[llvm] r290663 - This is a large patch for X86 AVX-512 of an optimization for reducing code size by encoding EVEX AVX-512 instructions using the shorter VEX encoding when possible.
Gadi Haber via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 28 02:12:50 PST 2016
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-mov.ll Wed Dec 28 04:12:48 2016
@@ -4,7 +4,7 @@
define <8 x i32> @test_256_1(i8 * %addr) {
; CHECK-LABEL: test_256_1:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
%res = load <8 x i32>, <8 x i32>* %vaddr, align 1
@@ -14,7 +14,7 @@ define <8 x i32> @test_256_1(i8 * %addr)
define <8 x i32> @test_256_2(i8 * %addr) {
; CHECK-LABEL: test_256_2:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
%res = load <8 x i32>, <8 x i32>* %vaddr, align 32
@@ -24,7 +24,7 @@ define <8 x i32> @test_256_2(i8 * %addr)
define void @test_256_3(i8 * %addr, <4 x i64> %data) {
; CHECK-LABEL: test_256_3:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
+; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
store <4 x i64>%data, <4 x i64>* %vaddr, align 32
@@ -34,7 +34,7 @@ define void @test_256_3(i8 * %addr, <4 x
define void @test_256_4(i8 * %addr, <8 x i32> %data) {
; CHECK-LABEL: test_256_4:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
store <8 x i32>%data, <8 x i32>* %vaddr, align 1
@@ -44,7 +44,7 @@ define void @test_256_4(i8 * %addr, <8 x
define void @test_256_5(i8 * %addr, <8 x i32> %data) {
; CHECK-LABEL: test_256_5:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
+; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i32>*
store <8 x i32>%data, <8 x i32>* %vaddr, align 32
@@ -54,7 +54,7 @@ define void @test_256_5(i8 * %addr, <8 x
define <4 x i64> @test_256_6(i8 * %addr) {
; CHECK-LABEL: test_256_6:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
%res = load <4 x i64>, <4 x i64>* %vaddr, align 32
@@ -64,7 +64,7 @@ define <4 x i64> @test_256_6(i8 * %addr
define void @test_256_7(i8 * %addr, <4 x i64> %data) {
; CHECK-LABEL: test_256_7:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
store <4 x i64>%data, <4 x i64>* %vaddr, align 1
@@ -74,7 +74,7 @@ define void @test_256_7(i8 * %addr, <4 x
define <4 x i64> @test_256_8(i8 * %addr) {
; CHECK-LABEL: test_256_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i64>*
%res = load <4 x i64>, <4 x i64>* %vaddr, align 1
@@ -84,7 +84,7 @@ define <4 x i64> @test_256_8(i8 * %addr)
define void @test_256_9(i8 * %addr, <4 x double> %data) {
; CHECK-LABEL: test_256_9:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
+; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
store <4 x double>%data, <4 x double>* %vaddr, align 32
@@ -94,7 +94,7 @@ define void @test_256_9(i8 * %addr, <4 x
define <4 x double> @test_256_10(i8 * %addr) {
; CHECK-LABEL: test_256_10:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
%res = load <4 x double>, <4 x double>* %vaddr, align 32
@@ -104,7 +104,7 @@ define <4 x double> @test_256_10(i8 * %a
define void @test_256_11(i8 * %addr, <8 x float> %data) {
; CHECK-LABEL: test_256_11:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
+; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
store <8 x float>%data, <8 x float>* %vaddr, align 32
@@ -114,7 +114,7 @@ define void @test_256_11(i8 * %addr, <8
define <8 x float> @test_256_12(i8 * %addr) {
; CHECK-LABEL: test_256_12:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
%res = load <8 x float>, <8 x float>* %vaddr, align 32
@@ -124,7 +124,7 @@ define <8 x float> @test_256_12(i8 * %ad
define void @test_256_13(i8 * %addr, <4 x double> %data) {
; CHECK-LABEL: test_256_13:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
store <4 x double>%data, <4 x double>* %vaddr, align 1
@@ -134,7 +134,7 @@ define void @test_256_13(i8 * %addr, <4
define <4 x double> @test_256_14(i8 * %addr) {
; CHECK-LABEL: test_256_14:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x double>*
%res = load <4 x double>, <4 x double>* %vaddr, align 1
@@ -144,7 +144,7 @@ define <4 x double> @test_256_14(i8 * %a
define void @test_256_15(i8 * %addr, <8 x float> %data) {
; CHECK-LABEL: test_256_15:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
+; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
store <8 x float>%data, <8 x float>* %vaddr, align 1
@@ -154,7 +154,7 @@ define void @test_256_15(i8 * %addr, <8
define <8 x float> @test_256_16(i8 * %addr) {
; CHECK-LABEL: test_256_16:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x float>*
%res = load <8 x float>, <8 x float>* %vaddr, align 1
@@ -164,7 +164,7 @@ define <8 x float> @test_256_16(i8 * %ad
define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_17:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -178,7 +178,7 @@ define <8 x i32> @test_256_17(i8 * %addr
define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_18:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -192,7 +192,7 @@ define <8 x i32> @test_256_18(i8 * %addr
define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_19:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -206,7 +206,7 @@ define <8 x i32> @test_256_19(i8 * %addr
define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
; CHECK-LABEL: test_256_20:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -220,7 +220,7 @@ define <8 x i32> @test_256_20(i8 * %addr
define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_21:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -234,7 +234,7 @@ define <4 x i64> @test_256_21(i8 * %addr
define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_22:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -248,7 +248,7 @@ define <4 x i64> @test_256_22(i8 * %addr
define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_23:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -262,7 +262,7 @@ define <4 x i64> @test_256_23(i8 * %addr
define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_24:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -276,7 +276,7 @@ define <4 x i64> @test_256_24(i8 * %addr
define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
; CHECK-LABEL: test_256_25:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
@@ -291,7 +291,7 @@ define <8 x float> @test_256_25(i8 * %ad
define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
; CHECK-LABEL: test_256_26:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
; CHECK-NEXT: vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
; CHECK-NEXT: vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
@@ -306,7 +306,7 @@ define <8 x float> @test_256_26(i8 * %ad
define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
; CHECK-LABEL: test_256_27:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
@@ -321,7 +321,7 @@ define <8 x float> @test_256_27(i8 * %ad
define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
; CHECK-LABEL: test_256_28:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
@@ -336,7 +336,7 @@ define <8 x float> @test_256_28(i8 * %ad
define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_29:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -350,7 +350,7 @@ define <4 x double> @test_256_29(i8 * %a
define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_30:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
; CHECK-NEXT: vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -364,7 +364,7 @@ define <4 x double> @test_256_30(i8 * %a
define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_31:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -378,7 +378,7 @@ define <4 x double> @test_256_31(i8 * %a
define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
; CHECK-LABEL: test_256_32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
+; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -392,7 +392,7 @@ define <4 x double> @test_256_32(i8 * %a
define <4 x i32> @test_128_1(i8 * %addr) {
; CHECK-LABEL: test_128_1:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
%res = load <4 x i32>, <4 x i32>* %vaddr, align 1
@@ -402,7 +402,7 @@ define <4 x i32> @test_128_1(i8 * %addr)
define <4 x i32> @test_128_2(i8 * %addr) {
; CHECK-LABEL: test_128_2:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
%res = load <4 x i32>, <4 x i32>* %vaddr, align 16
@@ -412,7 +412,7 @@ define <4 x i32> @test_128_2(i8 * %addr)
define void @test_128_3(i8 * %addr, <2 x i64> %data) {
; CHECK-LABEL: test_128_3:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
+; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
store <2 x i64>%data, <2 x i64>* %vaddr, align 16
@@ -422,7 +422,7 @@ define void @test_128_3(i8 * %addr, <2 x
define void @test_128_4(i8 * %addr, <4 x i32> %data) {
; CHECK-LABEL: test_128_4:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
store <4 x i32>%data, <4 x i32>* %vaddr, align 1
@@ -432,7 +432,7 @@ define void @test_128_4(i8 * %addr, <4 x
define void @test_128_5(i8 * %addr, <4 x i32> %data) {
; CHECK-LABEL: test_128_5:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
+; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x i32>*
store <4 x i32>%data, <4 x i32>* %vaddr, align 16
@@ -442,7 +442,7 @@ define void @test_128_5(i8 * %addr, <4 x
define <2 x i64> @test_128_6(i8 * %addr) {
; CHECK-LABEL: test_128_6:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
%res = load <2 x i64>, <2 x i64>* %vaddr, align 16
@@ -452,7 +452,7 @@ define <2 x i64> @test_128_6(i8 * %addr
define void @test_128_7(i8 * %addr, <2 x i64> %data) {
; CHECK-LABEL: test_128_7:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
store <2 x i64>%data, <2 x i64>* %vaddr, align 1
@@ -462,7 +462,7 @@ define void @test_128_7(i8 * %addr, <2 x
define <2 x i64> @test_128_8(i8 * %addr) {
; CHECK-LABEL: test_128_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x i64>*
%res = load <2 x i64>, <2 x i64>* %vaddr, align 1
@@ -472,7 +472,7 @@ define <2 x i64> @test_128_8(i8 * %addr)
define void @test_128_9(i8 * %addr, <2 x double> %data) {
; CHECK-LABEL: test_128_9:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
+; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
store <2 x double>%data, <2 x double>* %vaddr, align 16
@@ -482,7 +482,7 @@ define void @test_128_9(i8 * %addr, <2 x
define <2 x double> @test_128_10(i8 * %addr) {
; CHECK-LABEL: test_128_10:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
%res = load <2 x double>, <2 x double>* %vaddr, align 16
@@ -492,7 +492,7 @@ define <2 x double> @test_128_10(i8 * %a
define void @test_128_11(i8 * %addr, <4 x float> %data) {
; CHECK-LABEL: test_128_11:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
+; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
store <4 x float>%data, <4 x float>* %vaddr, align 16
@@ -502,7 +502,7 @@ define void @test_128_11(i8 * %addr, <4
define <4 x float> @test_128_12(i8 * %addr) {
; CHECK-LABEL: test_128_12:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
%res = load <4 x float>, <4 x float>* %vaddr, align 16
@@ -512,7 +512,7 @@ define <4 x float> @test_128_12(i8 * %ad
define void @test_128_13(i8 * %addr, <2 x double> %data) {
; CHECK-LABEL: test_128_13:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
store <2 x double>%data, <2 x double>* %vaddr, align 1
@@ -522,7 +522,7 @@ define void @test_128_13(i8 * %addr, <2
define <2 x double> @test_128_14(i8 * %addr) {
; CHECK-LABEL: test_128_14:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <2 x double>*
%res = load <2 x double>, <2 x double>* %vaddr, align 1
@@ -532,7 +532,7 @@ define <2 x double> @test_128_14(i8 * %a
define void @test_128_15(i8 * %addr, <4 x float> %data) {
; CHECK-LABEL: test_128_15:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
+; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
store <4 x float>%data, <4 x float>* %vaddr, align 1
@@ -542,7 +542,7 @@ define void @test_128_15(i8 * %addr, <4
define <4 x float> @test_128_16(i8 * %addr) {
; CHECK-LABEL: test_128_16:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
+; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <4 x float>*
%res = load <4 x float>, <4 x float>* %vaddr, align 1
@@ -552,7 +552,7 @@ define <4 x float> @test_128_16(i8 * %ad
define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_17:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -566,7 +566,7 @@ define <4 x i32> @test_128_17(i8 * %addr
define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_18:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -580,7 +580,7 @@ define <4 x i32> @test_128_18(i8 * %addr
define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_19:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -594,7 +594,7 @@ define <4 x i32> @test_128_19(i8 * %addr
define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_20:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -608,7 +608,7 @@ define <4 x i32> @test_128_20(i8 * %addr
define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_21:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -622,7 +622,7 @@ define <2 x i64> @test_128_21(i8 * %addr
define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_22:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -636,7 +636,7 @@ define <2 x i64> @test_128_22(i8 * %addr
define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_23:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -650,7 +650,7 @@ define <2 x i64> @test_128_23(i8 * %addr
define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_24:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -664,7 +664,7 @@ define <2 x i64> @test_128_24(i8 * %addr
define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_25:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -678,7 +678,7 @@ define <4 x float> @test_128_25(i8 * %ad
define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_26:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -692,7 +692,7 @@ define <4 x float> @test_128_26(i8 * %ad
define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_27:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -706,7 +706,7 @@ define <4 x float> @test_128_27(i8 * %ad
define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
; CHECK-LABEL: test_128_28:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -720,7 +720,7 @@ define <4 x float> @test_128_28(i8 * %ad
define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_29:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -734,7 +734,7 @@ define <2 x double> @test_128_29(i8 * %a
define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_30:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2]
; CHECK-NEXT: vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
; CHECK-NEXT: vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -748,7 +748,7 @@ define <2 x double> @test_128_30(i8 * %a
define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_31:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -762,7 +762,7 @@ define <2 x double> @test_128_31(i8 * %a
define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
; CHECK-LABEL: test_128_32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-nontemporal.ll Wed Dec 28 04:12:48 2016
@@ -1,15 +1,15 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
define void @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x double> %CC, i32 %D, <4 x i64> %E, <4 x i64> %EE) {
-; CHECK: vmovntps %ymm{{.*}} ## encoding: [0x62
+; CHECK: vmovntps %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
%cast = bitcast i8* %B to <8 x float>*
%A2 = fadd <8 x float> %A, %AA
store <8 x float> %A2, <8 x float>* %cast, align 64, !nontemporal !0
-; CHECK: vmovntdq %ymm{{.*}} ## encoding: [0x62
+; CHECK: vmovntdq %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
%cast1 = bitcast i8* %B to <4 x i64>*
%E2 = add <4 x i64> %E, %EE
store <4 x i64> %E2, <4 x i64>* %cast1, align 64, !nontemporal !0
-; CHECK: vmovntpd %ymm{{.*}} ## encoding: [0x62
+; CHECK: vmovntpd %ymm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
%cast2 = bitcast i8* %B to <4 x double>*
%C2 = fadd <4 x double> %C, %CC
store <4 x double> %C2, <4 x double>* %cast2, align 64, !nontemporal !0
@@ -17,15 +17,15 @@ define void @f256(<8 x float> %A, <8 x f
}
define void @f128(<4 x float> %A, <4 x float> %AA, i8* %B, <2 x double> %C, <2 x double> %CC, i32 %D, <2 x i64> %E, <2 x i64> %EE) {
-; CHECK: vmovntps %xmm{{.*}} ## encoding: [0x62
+; CHECK: vmovntps %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
%cast = bitcast i8* %B to <4 x float>*
%A2 = fadd <4 x float> %A, %AA
store <4 x float> %A2, <4 x float>* %cast, align 64, !nontemporal !0
-; CHECK: vmovntdq %xmm{{.*}} ## encoding: [0x62
+; CHECK: vmovntdq %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
%cast1 = bitcast i8* %B to <2 x i64>*
%E2 = add <2 x i64> %E, %EE
store <2 x i64> %E2, <2 x i64>* %cast1, align 64, !nontemporal !0
-; CHECK: vmovntpd %xmm{{.*}} ## encoding: [0x62
+; CHECK: vmovntpd %xmm{{.*}} ## EVEX TO VEX Compression encoding: [0xc5
%cast2 = bitcast i8* %B to <2 x double>*
%C2 = fadd <2 x double> %C, %CC
store <2 x double> %C2, <2 x double>* %cast2, align 64, !nontemporal !0
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vbroadcast.ll Wed Dec 28 04:12:48 2016
@@ -73,7 +73,7 @@ define <8 x float> @_inreg8xfloat(floa
define <8 x float> @_ss8xfloat_mask(<8 x float> %i, float %a, <8 x i32> %mask1) {
; CHECK-LABEL: _ss8xfloat_mask:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxord %ymm3, %ymm3, %ymm3
+; CHECK-NEXT: vpxor %ymm3, %ymm3, %ymm3
; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1
; CHECK-NEXT: vbroadcastss %xmm1, %ymm0 {%k1}
; CHECK-NEXT: retq
@@ -87,7 +87,7 @@ define <8 x float> @_ss8xfloat_mask(<8
define <8 x float> @_ss8xfloat_maskz(float %a, <8 x i32> %mask1) {
; CHECK-LABEL: _ss8xfloat_maskz:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vpxor %ymm2, %ymm2, %ymm2
; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -111,7 +111,7 @@ define <4 x float> @_inreg4xfloat(floa
define <4 x float> @_ss4xfloat_mask(<4 x float> %i, float %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xfloat_mask:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxord %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpneqd %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
; CHECK-NEXT: retq
@@ -125,7 +125,7 @@ define <4 x float> @_ss4xfloat_mask(<4
define <4 x float> @_ss4xfloat_maskz(float %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xfloat_maskz:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; CHECK-NEXT: retq
@@ -149,7 +149,7 @@ define <4 x double> @_inreg4xdouble(do
define <4 x double> @_ss4xdouble_mask(<4 x double> %i, double %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xdouble_mask:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxord %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpneqd %xmm3, %xmm2, %k1
; CHECK-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
; CHECK-NEXT: retq
@@ -163,7 +163,7 @@ define <4 x double> @_ss4xdouble_mask(
define <4 x double> @_ss4xdouble_maskz(double %a, <4 x i32> %mask1) {
; CHECK-LABEL: _ss4xdouble_maskz:
; CHECK: # BB#0:
-; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpneqd %xmm2, %xmm1, %k1
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/compress_expand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/compress_expand.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/compress_expand.ll (original)
+++ llvm/trunk/test/CodeGen/X86/compress_expand.ll Wed Dec 28 04:12:48 2016
@@ -238,7 +238,7 @@ define void @test12(float* %base, <4 x f
define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger) {
; SKX-LABEL: test13:
; SKX: # BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
@@ -268,7 +268,7 @@ define <2 x float> @test13(float* %base,
define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) {
; SKX-LABEL: test14:
; SKX: # BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm1, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
@@ -314,7 +314,7 @@ define <16 x double> @test16(double* %ba
; SKX-LABEL: test16:
; SKX: # BB#0:
; SKX-NEXT: vextracti32x8 $1, %zmm2, %ymm3
-; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4
+; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
; SKX-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
; SKX-NEXT: vpcmpeqd %ymm4, %ymm2, %k2
; SKX-NEXT: kmovb %k2, %eax
Added: llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir?rev=290663&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir (added)
+++ llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir Wed Dec 28 04:12:48 2016
@@ -0,0 +1,4485 @@
+# RUN: llc -march=x86-64 -run-pass x86-evex-to-vex-compress -verify-machineinstrs -mcpu=skx -o - %s | FileCheck %s
+# This test verifies VEX encdoing for AVX-512 instructions that use registers of low inedexes and
+# do not use zmm or mask registers and have a corresponding AVX/AVX2 opcode
+
+--- |
+ define void @evex_z256_to_vex_test() { ret void }
+ define void @evex_z128_to_vex_test() { ret void }
+ define void @evex_scalar_to_vex_test() { ret void }
+ define void @evex_z256_to_evex_test() { ret void }
+ define void @evex_z128_to_evex_test() { ret void }
+ define void @evex_scalar_to_evex_test() { ret void }
+...
+---
+ # CHECK-LABEL: name: evex_z256_to_vex_test
+ # CHECK: bb.0:
+
+name: evex_z256_to_vex_test
+body: |
+ bb.0:
+ ; CHECK: VMOVAPDYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVAPDZ256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVAPDYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVAPDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVAPDYrr %ymm0
+ %ymm0 = VMOVAPDZ256rr %ymm0
+ ; CHECK: %ymm0 = VMOVAPDYrr_REV %ymm0
+ %ymm0 = VMOVAPDZ256rr_REV %ymm0
+ ; CHECK: VMOVAPSYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVAPSZ256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVAPSYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVAPSZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVAPSYrr %ymm0
+ %ymm0 = VMOVAPSZ256rr %ymm0
+ ; CHECK: %ymm0 = VMOVAPSYrr_REV %ymm0
+ %ymm0 = VMOVAPSZ256rr_REV %ymm0
+ ; CHECK: %ymm0 = VMOVDDUPYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVDDUPZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVDDUPYrr %ymm0
+ %ymm0 = VMOVDDUPZ256rr %ymm0
+ ; CHECK: VMOVDQAYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVDQA32Z256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVDQAYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVDQA32Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVDQAYrr %ymm0
+ %ymm0 = VMOVDQA32Z256rr %ymm0
+ ; CHECK: %ymm0 = VMOVDQAYrr_REV %ymm0
+ %ymm0 = VMOVDQA32Z256rr_REV %ymm0
+ ; CHECK: VMOVDQAYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVDQA64Z256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVDQAYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVDQA64Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVDQAYrr %ymm0
+ %ymm0 = VMOVDQA64Z256rr %ymm0
+ ; CHECK: %ymm0 = VMOVDQAYrr_REV %ymm0
+ %ymm0 = VMOVDQA64Z256rr_REV %ymm0
+ ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVDQU16Z256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVDQU16Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVDQUYrr %ymm0
+ %ymm0 = VMOVDQU16Z256rr %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0
+ %ymm0 = VMOVDQU16Z256rr_REV %ymm0
+ ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVDQU32Z256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVDQU32Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVDQUYrr %ymm0
+ %ymm0 = VMOVDQU32Z256rr %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0
+ %ymm0 = VMOVDQU32Z256rr_REV %ymm0
+ ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVDQU64Z256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVDQU64Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVDQUYrr %ymm0
+ %ymm0 = VMOVDQU64Z256rr %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0
+ %ymm0 = VMOVDQU64Z256rr_REV %ymm0
+ ; CHECK: VMOVDQUYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVDQU8Z256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVDQU8Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVDQUYrr %ymm0
+ %ymm0 = VMOVDQU8Z256rr %ymm0
+ ; CHECK: %ymm0 = VMOVDQUYrr_REV %ymm0
+ %ymm0 = VMOVDQU8Z256rr_REV %ymm0
+ ; CHECK: %ymm0 = VMOVNTDQAYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVNTDQAZ256rm %rip, 1, _, %rax, _
+ ; CHECK: VMOVNTDQYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVNTDQZ256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: VMOVNTPDYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVNTPDZ256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: VMOVNTPSYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVNTPSZ256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVSHDUPYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVSHDUPZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVSHDUPYrr %ymm0
+ %ymm0 = VMOVSHDUPZ256rr %ymm0
+ ; CHECK: %ymm0 = VMOVSLDUPYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVSLDUPZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVSLDUPYrr %ymm0
+ %ymm0 = VMOVSLDUPZ256rr %ymm0
+ ; CHECK: VMOVUPDYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVUPDZ256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VMOVUPDYrm %rip, 1, _, %rax, _
+ %ymm0 = VMOVUPDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMOVUPDYrr %ymm0
+ %ymm0 = VMOVUPDZ256rr %ymm0
+ ; CHECK: %ymm0 = VMOVUPDYrr_REV %ymm0
+ %ymm0 = VMOVUPDZ256rr_REV %ymm0
+ ; CHECK: VMOVUPSYmr %rdi, 1, _, 0, _, %ymm0
+ VMOVUPSZ256mr %rdi, 1, _, 0, _, %ymm0
+ ; CHECK: %ymm0 = VPANDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPANDDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPANDYrr %ymm0, %ymm1
+ %ymm0 = VPANDDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPANDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPANDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPANDYrr %ymm0, %ymm1
+ %ymm0 = VPANDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPAVGBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPAVGBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPAVGBYrr %ymm0, %ymm1
+ %ymm0 = VPAVGBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPAVGWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPAVGWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPAVGWYrr %ymm0, %ymm1
+ %ymm0 = VPAVGWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDBYrr %ymm0, %ymm1
+ %ymm0 = VPADDBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDDYrr %ymm0, %ymm1
+ %ymm0 = VPADDDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDQYrr %ymm0, %ymm1
+ %ymm0 = VPADDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDSBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDSBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDSBYrr %ymm0, %ymm1
+ %ymm0 = VPADDSBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDSWYrr %ymm0, %ymm1
+ %ymm0 = VPADDSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDUSBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDUSBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDUSBYrr %ymm0, %ymm1
+ %ymm0 = VPADDUSBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDUSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDUSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDUSWYrr %ymm0, %ymm1
+ %ymm0 = VPADDUSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPADDWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPADDWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPADDWYrr %ymm0, %ymm1
+ %ymm0 = VPADDWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMULPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMULPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMULPDYrr %ymm0, %ymm1
+ %ymm0 = VMULPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMULPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMULPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMULPSYrr %ymm0, %ymm1
+ %ymm0 = VMULPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VORPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VORPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VORPDYrr %ymm0, %ymm1
+ %ymm0 = VORPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VORPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VORPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VORPSYrr %ymm0, %ymm1
+ %ymm0 = VORPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMADDUBSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMADDUBSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMADDUBSWYrr %ymm0, %ymm1
+ %ymm0 = VPMADDUBSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMADDWDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMADDWDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMADDWDYrr %ymm0, %ymm1
+ %ymm0 = VPMADDWDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMAXSBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMAXSBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMAXSBYrr %ymm0, %ymm1
+ %ymm0 = VPMAXSBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMAXSDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMAXSDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMAXSDYrr %ymm0, %ymm1
+ %ymm0 = VPMAXSDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMAXSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMAXSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMAXSWYrr %ymm0, %ymm1
+ %ymm0 = VPMAXSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMAXUBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMAXUBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMAXUBYrr %ymm0, %ymm1
+ %ymm0 = VPMAXUBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMAXUDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMAXUDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMAXUDYrr %ymm0, %ymm1
+ %ymm0 = VPMAXUDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMAXUWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMAXUWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMAXUWYrr %ymm0, %ymm1
+ %ymm0 = VPMAXUWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMINSBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMINSBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMINSBYrr %ymm0, %ymm1
+ %ymm0 = VPMINSBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMINSDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMINSDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMINSDYrr %ymm0, %ymm1
+ %ymm0 = VPMINSDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMINSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMINSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMINSWYrr %ymm0, %ymm1
+ %ymm0 = VPMINSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMINUBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMINUBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMINUBYrr %ymm0, %ymm1
+ %ymm0 = VPMINUBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMINUDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMINUDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMINUDYrr %ymm0, %ymm1
+ %ymm0 = VPMINUDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMINUWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMINUWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMINUWYrr %ymm0, %ymm1
+ %ymm0 = VPMINUWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMULDQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMULDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMULDQYrr %ymm0, %ymm1
+ %ymm0 = VPMULDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMULHRSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMULHRSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMULHRSWYrr %ymm0, %ymm1
+ %ymm0 = VPMULHRSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMULHUWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMULHUWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMULHUWYrr %ymm0, %ymm1
+ %ymm0 = VPMULHUWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMULHWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMULHWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMULHWYrr %ymm0, %ymm1
+ %ymm0 = VPMULHWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMULLDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMULLDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMULLDYrr %ymm0, %ymm1
+ %ymm0 = VPMULLDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMULLWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMULLWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMULLWYrr %ymm0, %ymm1
+ %ymm0 = VPMULLWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPMULUDQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPMULUDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMULUDQYrr %ymm0, %ymm1
+ %ymm0 = VPMULUDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPORYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPORDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPORYrr %ymm0, %ymm1
+ %ymm0 = VPORDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPORYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPORQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPORYrr %ymm0, %ymm1
+ %ymm0 = VPORQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBBYrr %ymm0, %ymm1
+ %ymm0 = VPSUBBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBDYrr %ymm0, %ymm1
+ %ymm0 = VPSUBDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBQYrr %ymm0, %ymm1
+ %ymm0 = VPSUBQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBSBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBSBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBSBYrr %ymm0, %ymm1
+ %ymm0 = VPSUBSBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBSWYrr %ymm0, %ymm1
+ %ymm0 = VPSUBSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBUSBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBUSBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBUSBYrr %ymm0, %ymm1
+ %ymm0 = VPSUBUSBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBUSWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBUSWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBUSWYrr %ymm0, %ymm1
+ %ymm0 = VPSUBUSWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSUBWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSUBWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSUBWYrr %ymm0, %ymm1
+ %ymm0 = VPSUBWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPXORYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPXORDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPXORYrr %ymm0, %ymm1
+ %ymm0 = VPXORDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPXORYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPXORQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPXORYrr %ymm0, %ymm1
+ %ymm0 = VPXORQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VADDPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VADDPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VADDPDYrr %ymm0, %ymm1
+ %ymm0 = VADDPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VADDPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VADDPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VADDPSYrr %ymm0, %ymm1
+ %ymm0 = VADDPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VANDNPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VANDNPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VANDNPDYrr %ymm0, %ymm1
+ %ymm0 = VANDNPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VANDNPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VANDNPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VANDNPSYrr %ymm0, %ymm1
+ %ymm0 = VANDNPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VANDPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VANDPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VANDPDYrr %ymm0, %ymm1
+ %ymm0 = VANDPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VANDPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VANDPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VANDPSYrr %ymm0, %ymm1
+ %ymm0 = VANDPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VDIVPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VDIVPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VDIVPDYrr %ymm0, %ymm1
+ %ymm0 = VDIVPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VDIVPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VDIVPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VDIVPSYrr %ymm0, %ymm1
+ %ymm0 = VDIVPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMAXCPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMAXCPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMAXCPDYrr %ymm0, %ymm1
+ %ymm0 = VMAXCPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMAXCPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMAXCPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMAXCPSYrr %ymm0, %ymm1
+ %ymm0 = VMAXCPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMAXPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMAXPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMAXPDYrr %ymm0, %ymm1
+ %ymm0 = VMAXPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMAXPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMAXPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMAXPSYrr %ymm0, %ymm1
+ %ymm0 = VMAXPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMINCPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMINCPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMINCPDYrr %ymm0, %ymm1
+ %ymm0 = VMINCPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMINCPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMINCPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMINCPSYrr %ymm0, %ymm1
+ %ymm0 = VMINCPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMINPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMINPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMINPDYrr %ymm0, %ymm1
+ %ymm0 = VMINPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VMINPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VMINPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VMINPSYrr %ymm0, %ymm1
+ %ymm0 = VMINPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VXORPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VXORPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VXORPDYrr %ymm0, %ymm1
+ %ymm0 = VXORPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VXORPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VXORPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VXORPSYrr %ymm0, %ymm1
+ %ymm0 = VXORPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPACKSSDWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPACKSSDWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPACKSSDWYrr %ymm0, %ymm1
+ %ymm0 = VPACKSSDWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPACKSSWBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPACKSSWBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPACKSSWBYrr %ymm0, %ymm1
+ %ymm0 = VPACKSSWBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPACKUSDWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPACKUSDWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPACKUSDWYrr %ymm0, %ymm1
+ %ymm0 = VPACKUSDWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPACKUSWBYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPACKUSWBZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPACKUSWBYrr %ymm0, %ymm1
+ %ymm0 = VPACKUSWBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VUNPCKHPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VUNPCKHPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VUNPCKHPDYrr %ymm0, %ymm1
+ %ymm0 = VUNPCKHPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VUNPCKHPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VUNPCKHPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VUNPCKHPSYrr %ymm0, %ymm1
+ %ymm0 = VUNPCKHPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VUNPCKLPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VUNPCKLPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VUNPCKLPDYrr %ymm0, %ymm1
+ %ymm0 = VUNPCKLPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VUNPCKLPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VUNPCKLPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VUNPCKLPSYrr %ymm0, %ymm1
+ %ymm0 = VUNPCKLPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VSUBPDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VSUBPDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VSUBPDYrr %ymm0, %ymm1
+ %ymm0 = VSUBPDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VSUBPSYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VSUBPSZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VSUBPSYrr %ymm0, %ymm1
+ %ymm0 = VSUBPSZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKHBWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKHBWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKHBWYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKHBWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKHDQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKHDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKHDQYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKHDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKHQDQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKHQDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKHQDQYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKHQDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKHWDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKHWDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKHWDYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKHWDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKLBWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKLBWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKLBWYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKLBWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKLDQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKLDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKLDQYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKLDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKLQDQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKLQDQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKLQDQYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKLQDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPUNPCKLWDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPUNPCKLWDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPUNPCKLWDYrr %ymm0, %ymm1
+ %ymm0 = VPUNPCKLWDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VFMADD132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADD132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADD132PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADD132PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADD132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADD132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADD132PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADD132PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADD213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADD213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADD213PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADD213PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADD213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADD213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADD213PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADD213PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADD231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADD231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADD231PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADD231PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADD231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADD231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADD231PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADD231PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADDSUB132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADDSUB132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADDSUB132PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADDSUB132PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADDSUB132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADDSUB132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADDSUB132PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADDSUB132PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADDSUB213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADDSUB213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADDSUB213PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADDSUB213PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADDSUB213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADDSUB213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADDSUB213PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADDSUB213PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADDSUB231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADDSUB231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADDSUB231PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADDSUB231PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMADDSUB231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMADDSUB231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMADDSUB231PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMADDSUB231PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUB132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUB132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUB132PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUB132PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUB132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUB132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUB132PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUB132PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUB213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUB213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUB213PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUB213PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUB213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUB213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUB213PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUB213PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUB231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUB231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUB231PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUB231PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUB231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUB231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUB231PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUB231PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUBADD132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUBADD132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUBADD132PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUBADD132PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUBADD132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUBADD132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUBADD132PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUBADD132PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUBADD213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUBADD213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUBADD213PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUBADD213PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUBADD213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUBADD213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUBADD213PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUBADD213PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUBADD231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUBADD231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUBADD231PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUBADD231PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFMSUBADD231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFMSUBADD231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFMSUBADD231PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFMSUBADD231PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMADD132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMADD132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMADD132PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMADD132PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMADD132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMADD132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMADD132PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMADD132PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMADD213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMADD213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMADD213PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMADD213PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMADD213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMADD213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMADD213PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMADD213PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMADD231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMADD231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMADD231PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMADD231PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMADD231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMADD231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMADD231PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMADD231PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMSUB132PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMSUB132PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMSUB132PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMSUB132PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMSUB132PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMSUB132PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMSUB132PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMSUB132PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMSUB213PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMSUB213PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMSUB213PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMSUB213PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMSUB213PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMSUB213PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMSUB213PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMSUB213PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMSUB231PDYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMSUB231PDZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMSUB231PDYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMSUB231PDZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VFNMSUB231PSYm %ymm0, %ymm0, %rsi, 1, _, 0, _
+ %ymm0 = VFNMSUB231PSZ256m %ymm0, %ymm0, %rsi, 1, _, 0, _
+ ; CHECK: %ymm0 = VFNMSUB231PSYr %ymm0, %ymm1, %ymm2
+ %ymm0 = VFNMSUB231PSZ256r %ymm0, %ymm1, %ymm2
+ ; CHECK: %ymm0 = VPSRADYri %ymm0, 7
+ %ymm0 = VPSRADZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSRADYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRADZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRADYrr %ymm0, %xmm1
+ %ymm0 = VPSRADZ256rr %ymm0, %xmm1
+ ; CHECK: %ymm0 = VPSRAVDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRAVDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRAVDYrr %ymm0, %ymm1
+ %ymm0 = VPSRAVDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSRAWYri %ymm0, 7
+ %ymm0 = VPSRAWZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSRAWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRAWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRAWYrr %ymm0, %xmm1
+ %ymm0 = VPSRAWZ256rr %ymm0, %xmm1
+ ; CHECK: %ymm0 = VPSRLDQYri %ymm0, %ymm1
+ %ymm0 = VPSRLDQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSRLDYri %ymm0, 7
+ %ymm0 = VPSRLDZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSRLDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRLDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRLDYrr %ymm0, %xmm1
+ %ymm0 = VPSRLDZ256rr %ymm0, %xmm1
+ ; CHECK: %ymm0 = VPSRLQYri %ymm0, 7
+ %ymm0 = VPSRLQZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSRLQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRLQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRLQYrr %ymm0, %xmm1
+ %ymm0 = VPSRLQZ256rr %ymm0, %xmm1
+ ; CHECK: %ymm0 = VPSRLVDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRLVDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRLVDYrr %ymm0, %ymm1
+ %ymm0 = VPSRLVDZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSRLVQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRLVQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRLVQYrr %ymm0, %ymm1
+ %ymm0 = VPSRLVQZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSRLWYri %ymm0, 7
+ %ymm0 = VPSRLWZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSRLWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSRLWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSRLWYrr %ymm0, %xmm1
+ %ymm0 = VPSRLWZ256rr %ymm0, %xmm1
+ ; CHECK: %ymm0 = VPMOVSXBDYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVSXBDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVSXBDYrr %xmm0
+ %ymm0 = VPMOVSXBDZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVSXBQYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVSXBQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVSXBQYrr %xmm0
+ %ymm0 = VPMOVSXBQZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVSXBWYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVSXBWZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVSXBWYrr %xmm0
+ %ymm0 = VPMOVSXBWZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVSXDQYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVSXDQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVSXDQYrr %xmm0
+ %ymm0 = VPMOVSXDQZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVSXWDYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVSXWDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVSXWDYrr %xmm0
+ %ymm0 = VPMOVSXWDZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVSXWQYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVSXWQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVSXWQYrr %xmm0
+ %ymm0 = VPMOVSXWQZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVZXBDYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVZXBDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVZXBDYrr %xmm0
+ %ymm0 = VPMOVZXBDZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVZXBQYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVZXBQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVZXBQYrr %xmm0
+ %ymm0 = VPMOVZXBQZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVZXBWYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVZXBWZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVZXBWYrr %xmm0
+ %ymm0 = VPMOVZXBWZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVZXDQYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVZXDQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVZXDQYrr %xmm0
+ %ymm0 = VPMOVZXDQZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVZXWDYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVZXWDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVZXWDYrr %xmm0
+ %ymm0 = VPMOVZXWDZ256rr %xmm0
+ ; CHECK: %ymm0 = VPMOVZXWQYrm %rip, 1, _, %rax, _
+ %ymm0 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPMOVZXWQYrr %xmm0
+ %ymm0 = VPMOVZXWQZ256rr %xmm0
+ ; CHECK: %ymm0 = VBROADCASTSDYrm %rip, 1, _, %rax, _
+ %ymm0 = VBROADCASTSDZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0
+ %ymm0 = VBROADCASTSDZ256r %xmm0
+ ; CHECK: %ymm0 = VBROADCASTSDYrr %xmm0
+ %ymm0 = VBROADCASTSDZ256r_s %xmm0
+ ; CHECK: %ymm0 = VBROADCASTSSYrm %rip, 1, _, %rax, _
+ %ymm0 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0
+ %ymm0 = VBROADCASTSSZ256r %xmm0
+ ; CHECK: %ymm0 = VBROADCASTSSYrr %xmm0
+ %ymm0 = VBROADCASTSSZ256r_s %xmm0
+ ; CHECK: %ymm0 = VPBROADCASTBYrm %rip, 1, _, %rax, _
+ %ymm0 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPBROADCASTBYrr %xmm0
+ %ymm0 = VPBROADCASTBZ256r %xmm0
+ ; CHECK: %ymm0 = VPBROADCASTDYrm %rip, 1, _, %rax, _
+ %ymm0 = VPBROADCASTDZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPBROADCASTDYrr %xmm0
+ %ymm0 = VPBROADCASTDZ256r %xmm0
+ ; CHECK: %ymm0 = VPBROADCASTWYrm %rip, 1, _, %rax, _
+ %ymm0 = VPBROADCASTWZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPBROADCASTWYrr %xmm0
+ %ymm0 = VPBROADCASTWZ256r %xmm0
+ ; CHECK: %ymm0 = VPBROADCASTQYrm %rip, 1, _, %rax, _
+ %ymm0 = VPBROADCASTQZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPBROADCASTQYrr %xmm0
+ %ymm0 = VPBROADCASTQZ256r %xmm0
+ ; CHECK: %ymm0 = VPABSBYrm %rip, 1, _, %rax, _
+ %ymm0 = VPABSBZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPABSBYrr %ymm0
+ %ymm0 = VPABSBZ256rr %ymm0
+ ; CHECK: %ymm0 = VPABSDYrm %rip, 1, _, %rax, _
+ %ymm0 = VPABSDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPABSDYrr %ymm0
+ %ymm0 = VPABSDZ256rr %ymm0
+ ; CHECK: %ymm0 = VPABSWYrm %rip, 1, _, %rax, _
+ %ymm0 = VPABSWZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPABSWYrr %ymm0
+ %ymm0 = VPABSWZ256rr %ymm0
+ ; CHECK: %ymm0 = VPSADBWYrm %ymm0, 1, _, %rax, _, _
+ %ymm0 = VPSADBWZ256rm %ymm0, 1, _, %rax, _, _
+ ; CHECK: %ymm0 = VPSADBWYrr %ymm0, %ymm1
+ %ymm0 = VPSADBWZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPERMDYrm %ymm0, %rdi, 1, _, 0, _
+ %ymm0 = VPERMDZ256rm %ymm0, %rdi, 1, _, 0, _
+ ; CHECK: %ymm0 = VPERMDYrr %ymm1, %ymm0
+ %ymm0 = VPERMDZ256rr %ymm1, %ymm0
+ ; CHECK: %ymm0 = VPERMILPDYmi %rdi, 1, _, 0, _, _
+ %ymm0 = VPERMILPDZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm0 = VPERMILPDYri %ymm0, 7
+ %ymm0 = VPERMILPDZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPERMILPDYrm %ymm0, %rdi, 1, _, 0, _
+ %ymm0 = VPERMILPDZ256rm %ymm0, %rdi, 1, _, 0, _
+ ; CHECK: %ymm0 = VPERMILPDYrr %ymm1, %ymm0
+ %ymm0 = VPERMILPDZ256rr %ymm1, %ymm0
+ ; CHECK: %ymm0 = VPERMILPSYmi %rdi, 1, _, 0, _, _
+ %ymm0 = VPERMILPSZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm0 = VPERMILPSYri %ymm0, 7
+ %ymm0 = VPERMILPSZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPERMILPSYrm %ymm0, %rdi, 1, _, 0, _
+ %ymm0 = VPERMILPSZ256rm %ymm0, %rdi, 1, _, 0, _
+ ; CHECK: %ymm0 = VPERMILPSYrr %ymm1, %ymm0
+ %ymm0 = VPERMILPSZ256rr %ymm1, %ymm0
+ ; CHECK: %ymm0 = VPERMPDYmi %rdi, 1, _, 0, _, _
+ %ymm0 = VPERMPDZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm0 = VPERMPDYri %ymm0, 7
+ %ymm0 = VPERMPDZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPERMPSYrm %ymm0, %rdi, 1, _, 0, _
+ %ymm0 = VPERMPSZ256rm %ymm0, %rdi, 1, _, 0, _
+ ; CHECK: %ymm0 = VPERMPSYrr %ymm1, %ymm0
+ %ymm0 = VPERMPSZ256rr %ymm1, %ymm0
+ ; CHECK: %ymm0 = VPERMQYmi %rdi, 1, _, 0, _, _
+ %ymm0 = VPERMQZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm0 = VPERMQYri %ymm0, 7
+ %ymm0 = VPERMQZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSLLDQYri %ymm0, 14
+ %ymm0 = VPSLLDQZ256rr %ymm0, 14
+ ; CHECK: %ymm0 = VPSLLDYri %ymm0, 7
+ %ymm0 = VPSLLDZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSLLDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSLLDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSLLDYrr %ymm0, 14
+ %ymm0 = VPSLLDZ256rr %ymm0, 14
+ ; CHECK: %ymm0 = VPSLLQYri %ymm0, 7
+ %ymm0 = VPSLLQZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSLLQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSLLQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSLLQYrr %ymm0, 14
+ %ymm0 = VPSLLQZ256rr %ymm0, 14
+ ; CHECK: %ymm0 = VPSLLVDYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSLLVDZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSLLVDYrr %ymm0, 14
+ %ymm0 = VPSLLVDZ256rr %ymm0, 14
+ ; CHECK: %ymm0 = VPSLLVQYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSLLVQZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSLLVQYrr %ymm0, 14
+ %ymm0 = VPSLLVQZ256rr %ymm0, 14
+ ; CHECK: %ymm0 = VPSLLWYri %ymm0, 7
+ %ymm0 = VPSLLWZ256ri %ymm0, 7
+ ; CHECK: %ymm0 = VPSLLWYrm %ymm0, %rip, 1, _, %rax, _
+ %ymm0 = VPSLLWZ256rm %ymm0, %rip, 1, _, %rax, _
+ ; CHECK: %ymm0 = VPSLLWYrr %ymm0, 14
+ %ymm0 = VPSLLWZ256rr %ymm0, 14
+ ; CHECK: %ymm0 = VCVTDQ2PDYrm %rdi, %ymm0, 1, _, 0
+ %ymm0 = VCVTDQ2PDZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %ymm0 = VCVTDQ2PDYrr %xmm0
+ %ymm0 = VCVTDQ2PDZ256rr %xmm0
+ ; CHECK: %ymm0 = VCVTDQ2PSYrm %rdi, %ymm0, 1, _, 0
+ %ymm0 = VCVTDQ2PSZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %ymm0 = VCVTDQ2PSYrr %ymm0
+ %ymm0 = VCVTDQ2PSZ256rr %ymm0
+ ; CHECK: %xmm0 = VCVTPD2DQYrm %rdi, %ymm0, 1, _, 0
+ %xmm0 = VCVTPD2DQZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPD2DQYrr %ymm0
+ %xmm0 = VCVTPD2DQZ256rr %ymm0
+ ; CHECK: %xmm0 = VCVTPD2PSYrm %rdi, %ymm0, 1, _, 0
+ %xmm0 = VCVTPD2PSZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPD2PSYrr %ymm0
+ %xmm0 = VCVTPD2PSZ256rr %ymm0
+ ; CHECK: %ymm0 = VCVTPS2DQYrm %rdi, %ymm0, 1, _, 0
+ %ymm0 = VCVTPS2DQZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %ymm0 = VCVTPS2DQYrr %ymm0
+ %ymm0 = VCVTPS2DQZ256rr %ymm0
+ ; CHECK: %ymm0 = VCVTPS2PDYrm %rdi, %ymm0, 1, _, 0
+ %ymm0 = VCVTPS2PDZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %ymm0 = VCVTPS2PDYrr %xmm0
+ %ymm0 = VCVTPS2PDZ256rr %xmm0
+ ; CHECK: VCVTPS2PHYmr %rdi, %ymm0, 1, _, 0, _, _
+ VCVTPS2PHZ256mr %rdi, %ymm0, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VCVTPS2PHYrr %ymm0, _
+ %xmm0 = VCVTPS2PHZ256rr %ymm0, _
+ ; CHECK: %ymm0 = VCVTPH2PSYrm %rdi, %ymm0, 1, _, 0
+ %ymm0 = VCVTPH2PSZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %ymm0 = VCVTPH2PSYrr %xmm0
+ %ymm0 = VCVTPH2PSZ256rr %xmm0
+ ; CHECK: %xmm0 = VCVTTPD2DQYrm %rdi, %ymm0, 1, _, 0
+ %xmm0 = VCVTTPD2DQZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTTPD2DQYrr %ymm0
+ %xmm0 = VCVTTPD2DQZ256rr %ymm0
+ ; CHECK: %ymm0 = VCVTTPS2DQYrm %rdi, %ymm0, 1, _, 0
+ %ymm0 = VCVTTPS2DQZ256rm %rdi, %ymm0, 1, _, 0
+ ; CHECK: %ymm0 = VCVTTPS2DQYrr %ymm0
+ %ymm0 = VCVTTPS2DQZ256rr %ymm0
+ ; CHECK: %ymm0 = VSQRTPDYm %rdi, _, _, _, _
+ %ymm0 = VSQRTPDZ256m %rdi, _, _, _, _
+ ; CHECK: %ymm0 = VSQRTPDYr %ymm0
+ %ymm0 = VSQRTPDZ256r %ymm0
+ ; CHECK: %ymm0 = VSQRTPSYm %rdi, _, _, _, _
+ %ymm0 = VSQRTPSZ256m %rdi, _, _, _, _
+ ; CHECK: %ymm0 = VSQRTPSYr %ymm0
+ %ymm0 = VSQRTPSZ256r %ymm0
+ ; CHECK: %ymm0 = VPALIGNRYrmi %ymm0, %rdi, _, _, _, _, _
+ %ymm0 = VPALIGNRZ256rmi %ymm0, %rdi, _, _, _, _, _
+ ; CHECK: %ymm0 = VPALIGNRYrri %ymm0, %ymm1, _
+ %ymm0 = VPALIGNRZ256rri %ymm0, %ymm1, _
+ ; CHECK: %ymm0 = VMOVUPSYrm %rdi, 1, _, 0, _
+ %ymm0 = VMOVUPSZ256rm %rdi, 1, _, 0, _
+ ; CHECK: %ymm0 = VMOVUPSYrr %ymm0
+ %ymm0 = VMOVUPSZ256rr %ymm0
+ ; CHECK: %ymm0 = VMOVUPSYrr_REV %ymm0
+ %ymm0 = VMOVUPSZ256rr_REV %ymm0
+ ; CHECK: %ymm0 = VPSHUFBYrm %ymm0, _, _, _, _, _
+ %ymm0 = VPSHUFBZ256rm %ymm0, _, _, _, _, _
+ ; CHECK: %ymm0 = VPSHUFBYrr %ymm0, %ymm1
+ %ymm0 = VPSHUFBZ256rr %ymm0, %ymm1
+ ; CHECK: %ymm0 = VPSHUFDYmi %rdi, 1, _, 0, _, _
+ %ymm0 = VPSHUFDZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm0 = VPSHUFDYri %ymm0, -24
+ %ymm0 = VPSHUFDZ256ri %ymm0, -24
+ ; CHECK: %ymm0 = VPSHUFHWYmi %rdi, 1, _, 0, _, _
+ %ymm0 = VPSHUFHWZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm0 = VPSHUFHWYri %ymm0, -24
+ %ymm0 = VPSHUFHWZ256ri %ymm0, -24
+ ; CHECK: %ymm0 = VPSHUFLWYmi %rdi, 1, _, 0, _, _
+ %ymm0 = VPSHUFLWZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm0 = VPSHUFLWYri %ymm0, -24
+ %ymm0 = VPSHUFLWZ256ri %ymm0, -24
+ ; CHECK: %ymm0 = VSHUFPDYrmi %ymm0, _, _, _, _, _, _
+ %ymm0 = VSHUFPDZ256rmi %ymm0, _, _, _, _, _, _
+ ; CHECK: %ymm0 = VSHUFPDYrri %ymm0, _, _
+ %ymm0 = VSHUFPDZ256rri %ymm0, _, _
+ ; CHECK: %ymm0 = VSHUFPSYrmi %ymm0, _, _, _, _, _, _
+ %ymm0 = VSHUFPSZ256rmi %ymm0, _, _, _, _, _, _
+ ; CHECK: %ymm0 = VSHUFPSYrri %ymm0, _, _
+ %ymm0 = VSHUFPSZ256rri %ymm0, _, _
+
+ RET 0, %zmm0, %zmm1
+...
+---
+ # CHECK-LABEL: name: evex_z128_to_vex_test
+ # CHECK: bb.0:
+
+name: evex_z128_to_vex_test
+body: |
+ bb.0:
+ ; CHECK: VMOVAPDmr %rdi, 1, _, 0, _, %xmm0
+ VMOVAPDZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVAPDrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVAPDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVAPDrr %xmm0
+ %xmm0 = VMOVAPDZ128rr %xmm0
+ ; CHECK: VMOVAPSmr %rdi, 1, _, 0, _, %xmm0
+ VMOVAPSZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVAPSrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVAPSZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVAPSrr %xmm0
+ %xmm0 = VMOVAPSZ128rr %xmm0
+ ; CHECK: VMOVDQAmr %rdi, 1, _, 0, _, %xmm0
+ VMOVDQA32Z128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVDQArm %rip, 1, _, %rax, _
+ %xmm0 = VMOVDQA32Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVDQArr %xmm0
+ %xmm0 = VMOVDQA32Z128rr %xmm0
+ ; CHECK: VMOVDQAmr %rdi, 1, _, 0, _, %xmm0
+ VMOVDQA64Z128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVDQArm %rip, 1, _, %rax, _
+ %xmm0 = VMOVDQA64Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVDQArr %xmm0
+ %xmm0 = VMOVDQA64Z128rr %xmm0
+ ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0
+ VMOVDQU16Z128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVDQU16Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVDQUrr %xmm0
+ %xmm0 = VMOVDQU16Z128rr %xmm0
+ ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0
+ VMOVDQU32Z128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVDQU32Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVDQUrr %xmm0
+ %xmm0 = VMOVDQU32Z128rr %xmm0
+ ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0
+ VMOVDQU64Z128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVDQU64Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVDQUrr %xmm0
+ %xmm0 = VMOVDQU64Z128rr %xmm0
+ ; CHECK: VMOVDQUmr %rdi, 1, _, 0, _, %xmm0
+ VMOVDQU8Z128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVDQU8Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVDQUrr %xmm0
+ %xmm0 = VMOVDQU8Z128rr %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0
+ %xmm0 = VMOVDQU8Z128rr_REV %xmm0
+ ; CHECK: %xmm0 = VMOVNTDQArm %rip, 1, _, %rax, _
+ %xmm0 = VMOVNTDQAZ128rm %rip, 1, _, %rax, _
+ ; CHECK: VMOVUPDmr %rdi, 1, _, 0, _, %xmm0
+ VMOVUPDZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVUPDrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVUPDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVUPDrr %xmm0
+ %xmm0 = VMOVUPDZ128rr %xmm0
+ ; CHECK: %xmm0 = VMOVUPDrr_REV %xmm0
+ %xmm0 = VMOVUPDZ128rr_REV %xmm0
+ ; CHECK: VMOVUPSmr %rdi, 1, _, 0, _, %xmm0
+ VMOVUPSZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVUPSrm %rip, 1, _, %rax, _
+ %xmm0 = VMOVUPSZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMOVUPSrr %xmm0
+ %xmm0 = VMOVUPSZ128rr %xmm0
+ ; CHECK: %xmm0 = VMOVUPSrr_REV %xmm0
+ %xmm0 = VMOVUPSZ128rr_REV %xmm0
+ ; CHECK: VMOVNTDQmr %rdi, 1, _, 0, _, %xmm0
+ VMOVNTDQZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: VMOVNTPDmr %rdi, 1, _, 0, _, %xmm0
+ VMOVNTPDZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: VMOVNTPSmr %rdi, 1, _, 0, _, %xmm0
+ VMOVNTPSZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVAPDrr_REV %xmm0
+ %xmm0 = VMOVAPDZ128rr_REV %xmm0
+ ; CHECK: %xmm0 = VMOVAPSrr_REV %xmm0
+ %xmm0 = VMOVAPSZ128rr_REV %xmm0
+ ; CHECK: %xmm0 = VMOVDQArr_REV %xmm0
+ %xmm0 = VMOVDQA32Z128rr_REV %xmm0
+ ; CHECK: %xmm0 = VMOVDQArr_REV %xmm0
+ %xmm0 = VMOVDQA64Z128rr_REV %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0
+ %xmm0 = VMOVDQU16Z128rr_REV %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0
+ %xmm0 = VMOVDQU32Z128rr_REV %xmm0
+ ; CHECK: %xmm0 = VMOVDQUrr_REV %xmm0
+ %xmm0 = VMOVDQU64Z128rr_REV %xmm0
+ ; CHECK: %xmm0 = VPMOVSXBDrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVSXBDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVSXBDrr %xmm0
+ %xmm0 = VPMOVSXBDZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVSXBQrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVSXBQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVSXBQrr %xmm0
+ %xmm0 = VPMOVSXBQZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVSXBWrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVSXBWZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVSXBWrr %xmm0
+ %xmm0 = VPMOVSXBWZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVSXDQrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVSXDQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVSXDQrr %xmm0
+ %xmm0 = VPMOVSXDQZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVSXWDrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVSXWDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVSXWDrr %xmm0
+ %xmm0 = VPMOVSXWDZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVSXWQrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVSXWQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVSXWQrr %xmm0
+ %xmm0 = VPMOVSXWQZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVZXBDrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVZXBDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVZXBDrr %xmm0
+ %xmm0 = VPMOVZXBDZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVZXBQrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVZXBQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVZXBQrr %xmm0
+ %xmm0 = VPMOVZXBQZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVZXBWrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVZXBWZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVZXBWrr %xmm0
+ %xmm0 = VPMOVZXBWZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVZXDQrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVZXDQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVZXDQrr %xmm0
+ %xmm0 = VPMOVZXDQZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVZXWDrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVZXWDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVZXWDrr %xmm0
+ %xmm0 = VPMOVZXWDZ128rr %xmm0
+ ; CHECK: %xmm0 = VPMOVZXWQrm %rip, 1, _, %rax, _
+ %xmm0 = VPMOVZXWQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMOVZXWQrr %xmm0
+ %xmm0 = VPMOVZXWQZ128rr %xmm0
+ ; CHECK: VMOVHPDmr %rdi, 1, _, 0, _, %xmm0
+ VMOVHPDZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVHPDrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VMOVHPDZ128rm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: VMOVHPSmr %rdi, 1, _, 0, _, %xmm0
+ VMOVHPSZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVHPSrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VMOVHPSZ128rm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: VMOVLPDmr %rdi, 1, _, 0, _, %xmm0
+ VMOVLPDZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVLPDrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VMOVLPDZ128rm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: VMOVLPSmr %rdi, 1, _, 0, _, %xmm0
+ VMOVLPSZ128mr %rdi, 1, _, 0, _, %xmm0
+ ; CHECK: %xmm0 = VMOVLPSrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VMOVLPSZ128rm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VMAXCPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXCPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXCPDrr %xmm0, %xmm1
+ %xmm0 = VMAXCPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXCPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXCPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXCPSrr %xmm0, %xmm1
+ %xmm0 = VMAXCPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXPDrr %xmm0, %xmm1
+ %xmm0 = VMAXPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXPSrr %xmm0, %xmm1
+ %xmm0 = VMAXPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINCPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINCPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINCPDrr %xmm0, %xmm1
+ %xmm0 = VMINCPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINCPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINCPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINCPSrr %xmm0, %xmm1
+ %xmm0 = VMINCPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINPDrr %xmm0, %xmm1
+ %xmm0 = VMINPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINPSrr %xmm0, %xmm1
+ %xmm0 = VMINPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMULPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMULPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMULPDrr %xmm0, %xmm1
+ %xmm0 = VMULPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMULPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMULPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMULPSrr %xmm0, %xmm1
+ %xmm0 = VMULPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VORPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VORPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VORPDrr %xmm0, %xmm1
+ %xmm0 = VORPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VORPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VORPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VORPSrr %xmm0, %xmm1
+ %xmm0 = VORPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDBrr %xmm0, %xmm1
+ %xmm0 = VPADDBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDDrr %xmm0, %xmm1
+ %xmm0 = VPADDDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDQrr %xmm0, %xmm1
+ %xmm0 = VPADDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDSBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDSBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDSBrr %xmm0, %xmm1
+ %xmm0 = VPADDSBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDSWrr %xmm0, %xmm1
+ %xmm0 = VPADDSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDUSBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDUSBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDUSBrr %xmm0, %xmm1
+ %xmm0 = VPADDUSBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDUSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDUSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDUSWrr %xmm0, %xmm1
+ %xmm0 = VPADDUSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPADDWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPADDWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPADDWrr %xmm0, %xmm1
+ %xmm0 = VPADDWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPANDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPANDDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPANDrr %xmm0, %xmm1
+ %xmm0 = VPANDDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPANDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPANDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPANDrr %xmm0, %xmm1
+ %xmm0 = VPANDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPAVGBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPAVGBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPAVGBrr %xmm0, %xmm1
+ %xmm0 = VPAVGBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPAVGWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPAVGWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPAVGWrr %xmm0, %xmm1
+ %xmm0 = VPAVGWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMAXSBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMAXSBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMAXSBrr %xmm0, %xmm1
+ %xmm0 = VPMAXSBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMAXSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMAXSDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMAXSDrr %xmm0, %xmm1
+ %xmm0 = VPMAXSDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMAXSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMAXSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMAXSWrr %xmm0, %xmm1
+ %xmm0 = VPMAXSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMAXUBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMAXUBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMAXUBrr %xmm0, %xmm1
+ %xmm0 = VPMAXUBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMAXUDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMAXUDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMAXUDrr %xmm0, %xmm1
+ %xmm0 = VPMAXUDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMAXUWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMAXUWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMAXUWrr %xmm0, %xmm1
+ %xmm0 = VPMAXUWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMINSBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMINSBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMINSBrr %xmm0, %xmm1
+ %xmm0 = VPMINSBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMINSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMINSDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMINSDrr %xmm0, %xmm1
+ %xmm0 = VPMINSDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMINSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMINSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMINSWrr %xmm0, %xmm1
+ %xmm0 = VPMINSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMINUBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMINUBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMINUBrr %xmm0, %xmm1
+ %xmm0 = VPMINUBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMINUDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMINUDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMINUDrr %xmm0, %xmm1
+ %xmm0 = VPMINUDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMINUWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMINUWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMINUWrr %xmm0, %xmm1
+ %xmm0 = VPMINUWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMULDQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMULDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMULDQrr %xmm0, %xmm1
+ %xmm0 = VPMULDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMULHRSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMULHRSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMULHRSWrr %xmm0, %xmm1
+ %xmm0 = VPMULHRSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMULHUWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMULHUWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMULHUWrr %xmm0, %xmm1
+ %xmm0 = VPMULHUWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMULHWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMULHWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMULHWrr %xmm0, %xmm1
+ %xmm0 = VPMULHWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMULLDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMULLDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMULLDrr %xmm0, %xmm1
+ %xmm0 = VPMULLDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMULLWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMULLWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMULLWrr %xmm0, %xmm1
+ %xmm0 = VPMULLWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMULUDQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMULUDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMULUDQrr %xmm0, %xmm1
+ %xmm0 = VPMULUDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPORrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPORDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPORrr %xmm0, %xmm1
+ %xmm0 = VPORDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPORrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPORQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPORrr %xmm0, %xmm1
+ %xmm0 = VPORQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBBrr %xmm0, %xmm1
+ %xmm0 = VPSUBBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBDrr %xmm0, %xmm1
+ %xmm0 = VPSUBDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBQrr %xmm0, %xmm1
+ %xmm0 = VPSUBQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBSBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBSBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBSBrr %xmm0, %xmm1
+ %xmm0 = VPSUBSBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBSWrr %xmm0, %xmm1
+ %xmm0 = VPSUBSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBUSBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBUSBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBUSBrr %xmm0, %xmm1
+ %xmm0 = VPSUBUSBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBUSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBUSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBUSWrr %xmm0, %xmm1
+ %xmm0 = VPSUBUSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSUBWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSUBWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSUBWrr %xmm0, %xmm1
+ %xmm0 = VPSUBWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VADDPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VADDPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VADDPDrr %xmm0, %xmm1
+ %xmm0 = VADDPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VADDPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VADDPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VADDPSrr %xmm0, %xmm1
+ %xmm0 = VADDPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VANDNPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VANDNPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VANDNPDrr %xmm0, %xmm1
+ %xmm0 = VANDNPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VANDNPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VANDNPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VANDNPSrr %xmm0, %xmm1
+ %xmm0 = VANDNPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VANDPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VANDPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VANDPDrr %xmm0, %xmm1
+ %xmm0 = VANDPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VANDPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VANDPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VANDPSrr %xmm0, %xmm1
+ %xmm0 = VANDPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VDIVPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VDIVPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VDIVPDrr %xmm0, %xmm1
+ %xmm0 = VDIVPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VDIVPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VDIVPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VDIVPSrr %xmm0, %xmm1
+ %xmm0 = VDIVPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPXORrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPXORDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPXORrr %xmm0, %xmm1
+ %xmm0 = VPXORDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPXORrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPXORQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPXORrr %xmm0, %xmm1
+ %xmm0 = VPXORQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VSUBPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VSUBPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VSUBPDrr %xmm0, %xmm1
+ %xmm0 = VSUBPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VSUBPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VSUBPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VSUBPSrr %xmm0, %xmm1
+ %xmm0 = VSUBPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VXORPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VXORPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VXORPDrr %xmm0, %xmm1
+ %xmm0 = VXORPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VXORPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VXORPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VXORPSrr %xmm0, %xmm1
+ %xmm0 = VXORPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMADDUBSWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMADDUBSWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMADDUBSWrr %xmm0, %xmm1
+ %xmm0 = VPMADDUBSWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPMADDWDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPMADDWDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPMADDWDrr %xmm0, %xmm1
+ %xmm0 = VPMADDWDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPACKSSDWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPACKSSDWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPACKSSDWrr %xmm0, %xmm1
+ %xmm0 = VPACKSSDWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPACKSSWBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPACKSSWBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPACKSSWBrr %xmm0, %xmm1
+ %xmm0 = VPACKSSWBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPACKUSDWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPACKUSDWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPACKUSDWrr %xmm0, %xmm1
+ %xmm0 = VPACKUSDWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPACKUSWBrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPACKUSWBZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPACKUSWBrr %xmm0, %xmm1
+ %xmm0 = VPACKUSWBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKHBWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKHBWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKHBWrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKHBWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKHDQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKHDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKHDQrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKHDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKHQDQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKHQDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKHQDQrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKHQDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKHWDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKHWDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKHWDrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKHWDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKLBWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKLBWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKLBWrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKLBWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKLDQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKLDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKLDQrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKLDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKLQDQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKLQDQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKLQDQrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKLQDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPUNPCKLWDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPUNPCKLWDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPUNPCKLWDrr %xmm0, %xmm1
+ %xmm0 = VPUNPCKLWDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VUNPCKHPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VUNPCKHPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VUNPCKHPDrr %xmm0, %xmm1
+ %xmm0 = VUNPCKHPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VUNPCKHPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VUNPCKHPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VUNPCKHPSrr %xmm0, %xmm1
+ %xmm0 = VUNPCKHPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VUNPCKLPDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VUNPCKLPDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VUNPCKLPDrr %xmm0, %xmm1
+ %xmm0 = VUNPCKLPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VUNPCKLPSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VUNPCKLPSZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VUNPCKLPSrr %xmm0, %xmm1
+ %xmm0 = VUNPCKLPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VFMADD132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD132PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD132PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD132PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD132PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD213PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD213PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD213PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD213PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD231PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD231PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD231PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD231PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADDSUB132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADDSUB132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADDSUB132PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADDSUB132PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADDSUB132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADDSUB132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADDSUB132PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADDSUB132PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADDSUB213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADDSUB213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADDSUB213PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADDSUB213PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADDSUB213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADDSUB213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADDSUB213PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADDSUB213PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADDSUB231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADDSUB231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADDSUB231PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADDSUB231PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADDSUB231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADDSUB231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADDSUB231PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADDSUB231PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB132PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB132PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB132PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB132PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB213PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB213PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB213PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB213PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB231PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB231PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB231PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB231PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUBADD132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUBADD132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUBADD132PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUBADD132PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUBADD132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUBADD132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUBADD132PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUBADD132PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUBADD213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUBADD213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUBADD213PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUBADD213PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUBADD213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUBADD213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUBADD213PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUBADD213PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUBADD231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUBADD231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUBADD231PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUBADD231PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUBADD231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUBADD231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUBADD231PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUBADD231PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD132PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD132PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD132PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD132PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD213PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD213PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD213PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD213PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD231PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD231PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD231PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD231PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB132PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB132PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB132PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB132PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB132PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB132PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB132PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB132PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB213PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB213PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB213PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB213PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB213PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB213PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB213PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB213PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB231PDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB231PDZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB231PDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB231PDZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB231PSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB231PSZ128m %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB231PSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB231PSZ128r %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VPSLLDri %xmm0, 7
+ %xmm0 = VPSLLDZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSLLDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSLLDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSLLDrr %xmm0, 14
+ %xmm0 = VPSLLDZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSLLQri %xmm0, 7
+ %xmm0 = VPSLLQZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSLLQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSLLQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSLLQrr %xmm0, 14
+ %xmm0 = VPSLLQZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSLLVDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSLLVDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSLLVDrr %xmm0, 14
+ %xmm0 = VPSLLVDZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSLLVQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSLLVQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSLLVQrr %xmm0, 14
+ %xmm0 = VPSLLVQZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSLLWri %xmm0, 7
+ %xmm0 = VPSLLWZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSLLWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSLLWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSLLWrr %xmm0, 14
+ %xmm0 = VPSLLWZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRADri %xmm0, 7
+ %xmm0 = VPSRADZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSRADrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRADZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRADrr %xmm0, 14
+ %xmm0 = VPSRADZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRAVDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRAVDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRAVDrr %xmm0, 14
+ %xmm0 = VPSRAVDZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRAWri %xmm0, 7
+ %xmm0 = VPSRAWZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSRAWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRAWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRAWrr %xmm0, 14
+ %xmm0 = VPSRAWZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRLDQri %xmm0, 14
+ %xmm0 = VPSRLDQZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRLDri %xmm0, 7
+ %xmm0 = VPSRLDZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSRLDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRLDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRLDrr %xmm0, 14
+ %xmm0 = VPSRLDZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRLQri %xmm0, 7
+ %xmm0 = VPSRLQZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSRLQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRLQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRLQrr %xmm0, 14
+ %xmm0 = VPSRLQZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRLVDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRLVDZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRLVDrr %xmm0, 14
+ %xmm0 = VPSRLVDZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRLVQrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRLVQZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRLVQrr %xmm0, 14
+ %xmm0 = VPSRLVQZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPSRLWri %xmm0, 7
+ %xmm0 = VPSRLWZ128ri %xmm0, 7
+ ; CHECK: %xmm0 = VPSRLWrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VPSRLWZ128rm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPSRLWrr %xmm0, 14
+ %xmm0 = VPSRLWZ128rr %xmm0, 14
+ ; CHECK: %xmm0 = VPERMILPDmi %rdi, 1, _, 0, _, _
+ %xmm0 = VPERMILPDZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VPERMILPDri %xmm0, 9
+ %xmm0 = VPERMILPDZ128ri %xmm0, 9
+ ; CHECK: %xmm0 = VPERMILPDrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VPERMILPDZ128rm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VPERMILPDrr %xmm0, %xmm1
+ %xmm0 = VPERMILPDZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPERMILPSmi %rdi, 1, _, 0, _, _
+ %xmm0 = VPERMILPSZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VPERMILPSri %xmm0, 9
+ %xmm0 = VPERMILPSZ128ri %xmm0, 9
+ ; CHECK: %xmm0 = VPERMILPSrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VPERMILPSZ128rm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VPERMILPSrr %xmm0, %xmm1
+ %xmm0 = VPERMILPSZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VCVTPH2PSrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTPH2PSZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPH2PSrr %xmm0
+ %xmm0 = VCVTPH2PSZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTDQ2PDrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTDQ2PDZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTDQ2PDrr %xmm0
+ %xmm0 = VCVTDQ2PDZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTDQ2PSrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTDQ2PSZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTDQ2PSrr %xmm0
+ %xmm0 = VCVTDQ2PSZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTPD2DQrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTPD2DQZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPD2DQrr %xmm0
+ %xmm0 = VCVTPD2DQZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTPD2PSrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTPD2PSZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPD2PSrr %xmm0
+ %xmm0 = VCVTPD2PSZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTPS2DQrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTPS2DQZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPS2DQrr %xmm0
+ %xmm0 = VCVTPS2DQZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTPS2PDrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTPS2PDZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPS2PDrr %xmm0
+ %xmm0 = VCVTPS2PDZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTTPD2DQrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTTPD2DQZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTTPD2DQrr %xmm0
+ %xmm0 = VCVTTPD2DQZ128rr %xmm0
+ ; CHECK: %xmm0 = VCVTTPS2DQrm %rdi, %xmm0, 1, _, 0
+ %xmm0 = VCVTTPS2DQZ128rm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %xmm0 = VCVTTPS2DQrr %xmm0
+ %xmm0 = VCVTTPS2DQZ128rr %xmm0
+ ; CHECK: %xmm0 = VSQRTPDm %rdi, _, _, _, _
+ %xmm0 = VSQRTPDZ128m %rdi, _, _, _, _
+ ; CHECK: %xmm0 = VSQRTPDr %xmm0
+ %xmm0 = VSQRTPDZ128r %xmm0
+ ; CHECK: %xmm0 = VSQRTPSm %rdi, _, _, _, _
+ %xmm0 = VSQRTPSZ128m %rdi, _, _, _, _
+ ; CHECK: %xmm0 = VSQRTPSr %xmm0
+ %xmm0 = VSQRTPSZ128r %xmm0
+ ; CHECK: %xmm0 = VMOVDDUPrm %rdi, 1, _, 0, _
+ %xmm0 = VMOVDDUPZ128rm %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VMOVDDUPrr %xmm0
+ %xmm0 = VMOVDDUPZ128rr %xmm0
+ ; CHECK: %xmm0 = VMOVSHDUPrm %rdi, 1, _, 0, _
+ %xmm0 = VMOVSHDUPZ128rm %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VMOVSHDUPrr %xmm0
+ %xmm0 = VMOVSHDUPZ128rr %xmm0
+ ; CHECK: %xmm0 = VMOVSLDUPrm %rdi, 1, _, 0, _
+ %xmm0 = VMOVSLDUPZ128rm %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VMOVSLDUPrr %xmm0
+ %xmm0 = VMOVSLDUPZ128rr %xmm0
+ ; CHECK: %xmm0 = VPSHUFBrm %xmm0, _, _, _, _, _
+ %xmm0 = VPSHUFBZ128rm %xmm0, _, _, _, _, _
+ ; CHECK: %xmm0 = VPSHUFBrr %xmm0, %xmm1
+ %xmm0 = VPSHUFBZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VPSHUFDmi %rdi, 1, _, 0, _, _
+ %xmm0 = VPSHUFDZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VPSHUFDri %xmm0, -24
+ %xmm0 = VPSHUFDZ128ri %xmm0, -24
+ ; CHECK: %xmm0 = VPSHUFHWmi %rdi, 1, _, 0, _, _
+ %xmm0 = VPSHUFHWZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VPSHUFHWri %xmm0, -24
+ %xmm0 = VPSHUFHWZ128ri %xmm0, -24
+ ; CHECK: %xmm0 = VPSHUFLWmi %rdi, 1, _, 0, _, _
+ %xmm0 = VPSHUFLWZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VPSHUFLWri %xmm0, -24
+ %xmm0 = VPSHUFLWZ128ri %xmm0, -24
+ ; CHECK: %xmm0 = VPSLLDQri %xmm0, %xmm1
+ %xmm0 = VPSLLDQZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VSHUFPDrmi %xmm0, _, _, _, _, _, _
+ %xmm0 = VSHUFPDZ128rmi %xmm0, _, _, _, _, _, _
+ ; CHECK: %xmm0 = VSHUFPDrri %xmm0, _, _
+ %xmm0 = VSHUFPDZ128rri %xmm0, _, _
+ ; CHECK: %xmm0 = VSHUFPSrmi %xmm0, _, _, _, _, _, _
+ %xmm0 = VSHUFPSZ128rmi %xmm0, _, _, _, _, _, _
+ ; CHECK: %xmm0 = VSHUFPSrri %xmm0, _, _
+ %xmm0 = VSHUFPSZ128rri %xmm0, _, _
+ ; CHECK: %xmm0 = VPSADBWrm %xmm0, 1, _, %rax, _, _
+ %xmm0 = VPSADBWZ128rm %xmm0, 1, _, %rax, _, _
+ ; CHECK: %xmm0 = VPSADBWrr %xmm0, %xmm1
+ %xmm0 = VPSADBWZ128rr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VBROADCASTSSrm %rip, _, _, _, _
+ %xmm0 = VBROADCASTSSZ128m %rip, _, _, _, _
+ ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0
+ %xmm0 = VBROADCASTSSZ128r %xmm0
+ ; CHECK: %xmm0 = VBROADCASTSSrr %xmm0
+ %xmm0 = VBROADCASTSSZ128r_s %xmm0
+ ; CHECK: %xmm0 = VPBROADCASTBrm %rip, _, _, _, _
+ %xmm0 = VPBROADCASTBZ128m %rip, _, _, _, _
+ ; CHECK: %xmm0 = VPBROADCASTBrr %xmm0
+ %xmm0 = VPBROADCASTBZ128r %xmm0
+ ; CHECK: %xmm0 = VPBROADCASTDrm %rip, _, _, _, _
+ %xmm0 = VPBROADCASTDZ128m %rip, _, _, _, _
+ ; CHECK: %xmm0 = VPBROADCASTDrr %xmm0
+ %xmm0 = VPBROADCASTDZ128r %xmm0
+ ; CHECK: %xmm0 = VPBROADCASTQrm %rip, _, _, _, _
+ %xmm0 = VPBROADCASTQZ128m %rip, _, _, _, _
+ ; CHECK: %xmm0 = VPBROADCASTQrr %xmm0
+ %xmm0 = VPBROADCASTQZ128r %xmm0
+ ; CHECK: %xmm0 = VPBROADCASTWrm %rip, _, _, _, _
+ %xmm0 = VPBROADCASTWZ128m %rip, _, _, _, _
+ ; CHECK: %xmm0 = VPBROADCASTWrr %xmm0
+ %xmm0 = VPBROADCASTWZ128r %xmm0
+ ; CHECK: %xmm0 = VCVTPS2PHrr %xmm0, 2
+ %xmm0 = VCVTPS2PHZ128rr %xmm0, 2
+ ; CHECK: VCVTPS2PHmr %rdi, %xmm0, 1, _, 0, _, _
+ VCVTPS2PHZ128mr %rdi, %xmm0, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VPABSBrm %rip, 1, _, %rax, _
+ %xmm0 = VPABSBZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPABSBrr %xmm0
+ %xmm0 = VPABSBZ128rr %xmm0
+ ; CHECK: %xmm0 = VPABSDrm %rip, 1, _, %rax, _
+ %xmm0 = VPABSDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPABSDrr %xmm0
+ %xmm0 = VPABSDZ128rr %xmm0
+ ; CHECK: %xmm0 = VPABSWrm %rip, 1, _, %rax, _
+ %xmm0 = VPABSWZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VPABSWrr %xmm0
+ %xmm0 = VPABSWZ128rr %xmm0
+ ; CHECK: %xmm0 = VPALIGNRrmi %xmm0, _, _, _, _, _, _
+ %xmm0 = VPALIGNRZ128rmi %xmm0, _, _, _, _, _, _
+ ; CHECK: %xmm0 = VPALIGNRrri %xmm0, %xmm1, 15
+ %xmm0 = VPALIGNRZ128rri %xmm0, %xmm1, 15
+
+ RET 0, %zmm0, %zmm1
+...
+---
+ # CHECK-LABEL: name: evex_scalar_to_vex_test
+ # CHECK: bb.0:
+
+name: evex_scalar_to_vex_test
+body: |
+ bb.0:
+
+ ; CHECK: %xmm0 = VADDSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VADDSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VADDSDrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VADDSDZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VADDSDrr %xmm0, %xmm1
+ %xmm0 = VADDSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VADDSDrr_Int %xmm0, %xmm1
+ %xmm0 = VADDSDZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VADDSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VADDSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VADDSSrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VADDSSZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VADDSSrr %xmm0, %xmm1
+ %xmm0 = VADDSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VADDSSrr_Int %xmm0, %xmm1
+ %xmm0 = VADDSSZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VDIVSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VDIVSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VDIVSDrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VDIVSDZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VDIVSDrr %xmm0, %xmm1
+ %xmm0 = VDIVSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VDIVSDrr_Int %xmm0, %xmm1
+ %xmm0 = VDIVSDZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VDIVSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VDIVSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VDIVSSrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VDIVSSZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VDIVSSrr %xmm0, %xmm1
+ %xmm0 = VDIVSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VDIVSSrr_Int %xmm0, %xmm1
+ %xmm0 = VDIVSSZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXCSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXCSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXCSDrr %xmm0, %xmm1
+ %xmm0 = VMAXCSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXCSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXCSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXCSSrr %xmm0, %xmm1
+ %xmm0 = VMAXCSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXSDrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXSDZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXSDrr %xmm0, %xmm1
+ %xmm0 = VMAXSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXSDrr_Int %xmm0, %xmm1
+ %xmm0 = VMAXSDZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXSSrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMAXSSZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMAXSSrr %xmm0, %xmm1
+ %xmm0 = VMAXSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMAXSSrr_Int %xmm0, %xmm1
+ %xmm0 = VMAXSSZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINCSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINCSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINCSDrr %xmm0, %xmm1
+ %xmm0 = VMINCSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINCSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINCSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINCSSrr %xmm0, %xmm1
+ %xmm0 = VMINCSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINSDrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINSDZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINSDrr %xmm0, %xmm1
+ %xmm0 = VMINSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINSDrr_Int %xmm0, %xmm1
+ %xmm0 = VMINSDZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINSSrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMINSSZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMINSSrr %xmm0, %xmm1
+ %xmm0 = VMINSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMINSSrr_Int %xmm0, %xmm1
+ %xmm0 = VMINSSZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMULSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMULSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMULSDrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMULSDZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMULSDrr %xmm0, %xmm1
+ %xmm0 = VMULSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMULSDrr_Int %xmm0, %xmm1
+ %xmm0 = VMULSDZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMULSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMULSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMULSSrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VMULSSZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VMULSSrr %xmm0, %xmm1
+ %xmm0 = VMULSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VMULSSrr_Int %xmm0, %xmm1
+ %xmm0 = VMULSSZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VSUBSDrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VSUBSDZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VSUBSDrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VSUBSDZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VSUBSDrr %xmm0, %xmm1
+ %xmm0 = VSUBSDZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VSUBSDrr_Int %xmm0, %xmm1
+ %xmm0 = VSUBSDZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VSUBSSrm %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VSUBSSZrm %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VSUBSSrm_Int %xmm0, %rip, 1, _, %rax, _
+ %xmm0 = VSUBSSZrm_Int %xmm0, %rip, 1, _, %rax, _
+ ; CHECK: %xmm0 = VSUBSSrr %xmm0, %xmm1
+ %xmm0 = VSUBSSZrr %xmm0, %xmm1
+ ; CHECK: %xmm0 = VSUBSSrr_Int %xmm0, %xmm1
+ %xmm0 = VSUBSSZrr_Int %xmm0, %xmm1
+ ; CHECK: %xmm0 = VFMADD132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD132SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD132SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD132SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD132SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD132SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD132SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD132SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD132SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD213SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD213SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD213SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD213SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD213SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD213SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD213SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD213SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD231SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD231SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD231SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD231SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMADD231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMADD231SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD231SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMADD231SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMADD231SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB132SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB132SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB132SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB132SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB132SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB132SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB132SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB132SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB213SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB213SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB213SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB213SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB213SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB213SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB213SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB213SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB231SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB231SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB231SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB231SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFMSUB231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFMSUB231SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB231SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFMSUB231SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFMSUB231SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD132SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD132SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD132SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD132SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD132SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD132SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD132SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD132SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD213SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD213SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD213SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD213SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD213SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD213SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD213SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD213SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD231SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD231SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD231SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD231SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMADD231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMADD231SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD231SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMADD231SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMADD231SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB132SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB132SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB132SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB132SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB132SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB132SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB132SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB132SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB132SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB132SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB132SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB132SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB132SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB132SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB132SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB132SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB213SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB213SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB213SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB213SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB213SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB213SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB213SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB213SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB213SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB213SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB213SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB213SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB213SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB213SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB213SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB213SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB231SDm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB231SDZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB231SDm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB231SDZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB231SDr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB231SDZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB231SDr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB231SDZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB231SSm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB231SSZm %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB231SSm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ %xmm0 = VFNMSUB231SSZm_Int %xmm0, %xmm0, %rsi, 1, _, 0, _
+ ; CHECK: %xmm0 = VFNMSUB231SSr %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB231SSZr %xmm0, %xmm1, %xmm2
+ ; CHECK: %xmm0 = VFNMSUB231SSr_Int %xmm0, %xmm1, %xmm2
+ %xmm0 = VFNMSUB231SSZr_Int %xmm0, %xmm1, %xmm2
+ ; CHECK: VPEXTRBmr %rdi, 1, _, 0, _, %xmm0, 3
+ VPEXTRBZmr %rdi, 1, _, 0, _, %xmm0, 3
+ ; CHECK: %eax = VPEXTRBrr %xmm0, 1
+ %eax = VPEXTRBZrr %xmm0, 1
+ ; CHECK: VPEXTRDmr %rdi, 1, _, 0, _, %xmm0, 3
+ VPEXTRDZmr %rdi, 1, _, 0, _, %xmm0, 3
+ ; CHECK: %eax = VPEXTRDrr %xmm0, 1
+ %eax = VPEXTRDZrr %xmm0, 1
+ ; CHECK: VPEXTRQmr %rdi, 1, _, 0, _, %xmm0, 3
+ VPEXTRQZmr %rdi, 1, _, 0, _, %xmm0, 3
+ ; CHECK: %rax = VPEXTRQrr %xmm0, 1
+ %rax = VPEXTRQZrr %xmm0, 1
+ ; CHECK: VPEXTRWmr %rdi, 1, _, 0, _, %xmm0, 3
+ VPEXTRWZmr %rdi, 1, _, 0, _, %xmm0, 3
+ ; CHECK: %eax = VPEXTRWri %xmm0, 1
+ %eax = VPEXTRWZrr %xmm0, 1
+ ; CHECK: %xmm0 = VPINSRBrm %xmm0, %rsi, 1, _, 0, _, 3
+ %xmm0 = VPINSRBZrm %xmm0, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm0 = VPINSRBrr %xmm0, %edi, 5
+ %xmm0 = VPINSRBZrr %xmm0, %edi, 5
+ ; CHECK: %xmm0 = VPINSRDrm %xmm0, %rsi, 1, _, 0, _, 3
+ %xmm0 = VPINSRDZrm %xmm0, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm0 = VPINSRDrr %xmm0, %edi, 5
+ %xmm0 = VPINSRDZrr %xmm0, %edi, 5
+ ; CHECK: %xmm0 = VPINSRQrm %xmm0, %rsi, 1, _, 0, _, 3
+ %xmm0 = VPINSRQZrm %xmm0, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm0 = VPINSRQrr %xmm0, %rdi, 5
+ %xmm0 = VPINSRQZrr %xmm0, %rdi, 5
+ ; CHECK: %xmm0 = VPINSRWrmi %xmm0, %rsi, 1, _, 0, _, 3
+ %xmm0 = VPINSRWZrm %xmm0, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm0 = VPINSRWrri %xmm0, %edi, 5
+ %xmm0 = VPINSRWZrr %xmm0, %edi, 5
+ ; CHECK: %xmm0 = VSQRTSDm %xmm0, _, _, _, _, _
+ %xmm0 = VSQRTSDZm %xmm0, _, _, _, _, _
+ ; CHECK: %xmm0 = VSQRTSDm_Int %xmm0, _, _, _, _, _
+ %xmm0 = VSQRTSDZm_Int %xmm0, _, _, _, _, _
+ ; CHECK: %xmm0 = VSQRTSDr %xmm0, _
+ %xmm0 = VSQRTSDZr %xmm0, _
+ ; CHECK: %xmm0 = VSQRTSDr_Int %xmm0, _
+ %xmm0 = VSQRTSDZr_Int %xmm0, _
+ ; CHECK: %xmm0 = VSQRTSSm %xmm0, _, _, _, _, _
+ %xmm0 = VSQRTSSZm %xmm0, _, _, _, _, _
+ ; CHECK: %xmm0 = VSQRTSSm_Int %xmm0, _, _, _, _, _
+ %xmm0 = VSQRTSSZm_Int %xmm0, _, _, _, _, _
+ ; CHECK: %xmm0 = VSQRTSSr %xmm0, _
+ %xmm0 = VSQRTSSZr %xmm0, _
+ ; CHECK: %xmm0 = VSQRTSSr_Int %xmm0, _
+ %xmm0 = VSQRTSSZr_Int %xmm0, _
+ ; CHECK: %rdi = VCVTSD2SI64rm %rdi, %xmm0, 1, _, 0
+ %rdi = VCVTSD2SI64Zrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %rdi = VCVTSD2SI64rr %xmm0
+ %rdi = VCVTSD2SI64Zrr %xmm0
+ ; CHECK: %edi = VCVTSD2SIrm %rdi, %xmm0, 1, _, 0
+ %edi = VCVTSD2SIZrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %edi = VCVTSD2SIrr %xmm0
+ %edi = VCVTSD2SIZrr %xmm0
+ ; CHECK: %xmm0 = VCVTSD2SSrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VCVTSD2SSZrm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VCVTSD2SSrr %xmm0, _
+ %xmm0 = VCVTSD2SSZrr %xmm0, _
+ ; CHECK: %xmm0 = VCVTSI2SDrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VCVTSI2SDZrm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = Int_VCVTSI2SDrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VCVTSI2SDZrm_Int %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VCVTSI2SDrr %xmm0, _
+ %xmm0 = VCVTSI2SDZrr %xmm0, _
+ ; CHECK: %xmm0 = Int_VCVTSI2SDrr %xmm0, _
+ %xmm0 = VCVTSI2SDZrr_Int %xmm0, _
+ ; CHECK: %xmm0 = VCVTSI2SSrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VCVTSI2SSZrm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = Int_VCVTSI2SSrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VCVTSI2SSZrm_Int %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VCVTSI2SSrr %xmm0, _
+ %xmm0 = VCVTSI2SSZrr %xmm0, _
+ ; CHECK: %xmm0 = Int_VCVTSI2SSrr %xmm0, _
+ %xmm0 = VCVTSI2SSZrr_Int %xmm0, _
+ ; CHECK: %xmm0 = VCVTSS2SDrm %xmm0, %rdi, 1, _, 0, _
+ %xmm0 = VCVTSS2SDZrm %xmm0, %rdi, 1, _, 0, _
+ ; CHECK: %xmm0 = VCVTSS2SDrr %xmm0, _
+ %xmm0 = VCVTSS2SDZrr %xmm0, _
+ ; CHECK: %rdi = VCVTSS2SI64rm %rdi, %xmm0, 1, _, 0
+ %rdi = VCVTSS2SI64Zrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %rdi = VCVTSS2SI64rr %xmm0
+ %rdi = VCVTSS2SI64Zrr %xmm0
+ ; CHECK: %edi = VCVTSS2SIrm %rdi, %xmm0, 1, _, 0
+ %edi = VCVTSS2SIZrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %edi = VCVTSS2SIrr %xmm0
+ %edi = VCVTSS2SIZrr %xmm0
+ ; CHECK: %rdi = VCVTTSD2SI64rm %rdi, %xmm0, 1, _, 0
+ %rdi = VCVTTSD2SI64Zrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %rdi = Int_VCVTTSD2SI64rm %rdi, %xmm0, 1, _, 0
+ %rdi = VCVTTSD2SI64Zrm_Int %rdi, %xmm0, 1, _, 0
+ ; CHECK: %rdi = VCVTTSD2SI64rr %xmm0
+ %rdi = VCVTTSD2SI64Zrr %xmm0
+ ; CHECK: %rdi = Int_VCVTTSD2SI64rr %xmm0
+ %rdi = VCVTTSD2SI64Zrr_Int %xmm0
+ ; CHECK: %edi = VCVTTSD2SIrm %rdi, %xmm0, 1, _, 0
+ %edi = VCVTTSD2SIZrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %edi = Int_VCVTTSD2SIrm %rdi, %xmm0, 1, _, 0
+ %edi = VCVTTSD2SIZrm_Int %rdi, %xmm0, 1, _, 0
+ ; CHECK: %edi = VCVTTSD2SIrr %xmm0
+ %edi = VCVTTSD2SIZrr %xmm0
+ ; CHECK: %edi = Int_VCVTTSD2SIrr %xmm0
+ %edi = VCVTTSD2SIZrr_Int %xmm0
+ ; CHECK: %rdi = VCVTTSS2SI64rm %rdi, %xmm0, 1, _, 0
+ %rdi = VCVTTSS2SI64Zrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %rdi = Int_VCVTTSS2SI64rm %rdi, %xmm0, 1, _, 0
+ %rdi = VCVTTSS2SI64Zrm_Int %rdi, %xmm0, 1, _, 0
+ ; CHECK: %rdi = VCVTTSS2SI64rr %xmm0
+ %rdi = VCVTTSS2SI64Zrr %xmm0
+ ; CHECK: %rdi = Int_VCVTTSS2SI64rr %xmm0
+ %rdi = VCVTTSS2SI64Zrr_Int %xmm0
+ ; CHECK: %edi = VCVTTSS2SIrm %rdi, %xmm0, 1, _, 0
+ %edi = VCVTTSS2SIZrm %rdi, %xmm0, 1, _, 0
+ ; CHECK: %edi = Int_VCVTTSS2SIrm %rdi, %xmm0, 1, _, 0
+ %edi = VCVTTSS2SIZrm_Int %rdi, %xmm0, 1, _, 0
+ ; CHECK: %edi = VCVTTSS2SIrr %xmm0
+ %edi = VCVTTSS2SIZrr %xmm0
+ ; CHECK: %edi = Int_VCVTTSS2SIrr %xmm0
+ %edi = VCVTTSS2SIZrr_Int %xmm0
+ ; CHECK: %xmm0 = VMOV64toSDrr %rdi
+ %xmm0 = VMOV64toSDZrr %rdi
+ ; CHECK: %xmm0 = VMOVDI2SSrm %rip, _, _, _, _
+ %xmm0 = VMOVDI2SSZrm %rip, _, _, _, _
+ ; CHECK: %xmm0 = VMOVDI2SSrr %eax
+ %xmm0 = VMOVDI2SSZrr %eax
+ ; CHECK: VMOVSDmr %rdi, %xmm0, _, _, _, _
+ VMOVSDZmr %rdi, %xmm0, _, _, _, _
+ ; CHECK: %xmm0 = VMOVSDrm %rip, _, _, _, _
+ %xmm0 = VMOVSDZrm %rip, _, _, _, _
+ ; CHECK: %xmm0 = VMOVSDrr %xmm0, _
+ %xmm0 = VMOVSDZrr %xmm0, _
+ ; CHECK: VMOVSSmr %rdi, %xmm0, _, _, _, _
+ VMOVSSZmr %rdi, %xmm0, _, _, _, _
+ ; CHECK: %xmm0 = VMOVSSrm %rip, _, _, _, _
+ %xmm0 = VMOVSSZrm %rip, _, _, _, _
+ ; CHECK: %xmm0 = VMOVSSrr %xmm0, _
+ %xmm0 = VMOVSSZrr %xmm0, _
+ ; CHECK: %xmm0 = VMOVSSrr_REV %xmm0, _
+ %xmm0 = VMOVSSZrr_REV %xmm0, _
+ ; CHECK: %xmm0 = VMOV64toPQIrr %rdi
+ %xmm0 = VMOV64toPQIZrr %rdi
+ ; CHECK: %xmm0 = VMOV64toSDrr %rdi
+ %xmm0 = VMOV64toSDZrr %rdi
+ ; CHECK: %xmm0 = VMOVDI2PDIrm %rip, _, _, _, _
+ %xmm0 = VMOVDI2PDIZrm %rip, _, _, _, _
+ ; CHECK: %xmm0 = VMOVDI2PDIrr %edi
+ %xmm0 = VMOVDI2PDIZrr %edi
+ ; CHECK: %xmm0 = VMOVLHPSrr %xmm0, _
+ %xmm0 = VMOVLHPSZrr %xmm0, _
+ ; CHECK: %xmm0 = VMOVHLPSrr %xmm0, _
+ %xmm0 = VMOVHLPSZrr %xmm0, _
+ ; CHECK: VMOVPDI2DImr %rdi, %xmm0, _, _, _, _
+ VMOVPDI2DIZmr %rdi, %xmm0, _, _, _, _
+ ; CHECK: %edi = VMOVPDI2DIrr %xmm0
+ %edi = VMOVPDI2DIZrr %xmm0
+ ; CHECK: VMOVPQI2QImr %rdi, %xmm0, _, _, _, _
+ VMOVPQI2QIZmr %rdi, %xmm0, _, _, _, _
+ ; CHECK: %rdi = VMOVPQIto64rr %xmm0
+ %rdi = VMOVPQIto64Zrr %xmm0
+ ; CHECK: %xmm0 = VMOVQI2PQIrm %rip, _, _, _, _
+ %xmm0 = VMOVQI2PQIZrm %rip, _, _, _, _
+ ; CHECK: %xmm0 = VMOVZPQILo2PQIrr %xmm0
+ %xmm0 = VMOVZPQILo2PQIZrr %xmm0
+ ; CHECK: Int_VCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VCOMISDrr %xmm0, %xmm1, implicit-def %eflags
+ Int_VCOMISDZrr %xmm0, %xmm1, implicit-def %eflags
+ ; CHECK: Int_VCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VCOMISSrr %xmm0, %xmm1, implicit-def %eflags
+ Int_VCOMISSZrr %xmm0, %xmm1, implicit-def %eflags
+ ; CHECK: Int_VUCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VUCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VUCOMISDrr %xmm0, %xmm1, implicit-def %eflags
+ Int_VUCOMISDZrr %xmm0, %xmm1, implicit-def %eflags
+ ; CHECK: Int_VUCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VUCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VUCOMISSrr %xmm0, %xmm1, implicit-def %eflags
+ Int_VUCOMISSZrr %xmm0, %xmm1, implicit-def %eflags
+ ; CHECK: VCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ VCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VCOMISDrr %xmm0, %xmm1, implicit-def %eflags
+ VCOMISDZrr %xmm0, %xmm1, implicit-def %eflags
+ ; CHECK: VCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ VCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VCOMISSrr %xmm0, %xmm1, implicit-def %eflags
+ VCOMISSZrr %xmm0, %xmm1, implicit-def %eflags
+ ; CHECK: VUCOMISDrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ VUCOMISDZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VUCOMISDrr %xmm0, %xmm1, implicit-def %eflags
+ VUCOMISDZrr %xmm0, %xmm1, implicit-def %eflags
+ ; CHECK: VUCOMISSrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ VUCOMISSZrm %xmm0, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VUCOMISSrr %xmm0, %xmm1, implicit-def %eflags
+ VUCOMISSZrr %xmm0, %xmm1, implicit-def %eflags
+
+ RET 0, %zmm0, %zmm1
+...
+---
+ # CHECK-LABEL: name: evex_z256_to_evex_test
+ # CHECK: bb.0:
+
+name: evex_z256_to_evex_test
+body: |
+ bb.0:
+ ; CHECK: VMOVAPDZ256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVAPDZ256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVAPDZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVAPDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVAPDZ256rr %ymm16
+ %ymm16 = VMOVAPDZ256rr %ymm16
+ ; CHECK: %ymm16 = VMOVAPDZ256rr_REV %ymm16
+ %ymm16 = VMOVAPDZ256rr_REV %ymm16
+ ; CHECK: VMOVAPSZ256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVAPSZ256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVAPSZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVAPSZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVAPSZ256rr %ymm16
+ %ymm16 = VMOVAPSZ256rr %ymm16
+ ; CHECK: %ymm16 = VMOVAPSZ256rr_REV %ymm16
+ %ymm16 = VMOVAPSZ256rr_REV %ymm16
+ ; CHECK: %ymm16 = VMOVDDUPZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVDDUPZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVDDUPZ256rr %ymm16
+ %ymm16 = VMOVDDUPZ256rr %ymm16
+ ; CHECK: VMOVDQA32Z256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVDQA32Z256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVDQA32Z256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVDQA32Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVDQA32Z256rr %ymm16
+ %ymm16 = VMOVDQA32Z256rr %ymm16
+ ; CHECK: %ymm16 = VMOVDQA32Z256rr_REV %ymm16
+ %ymm16 = VMOVDQA32Z256rr_REV %ymm16
+ ; CHECK: VMOVDQA64Z256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVDQA64Z256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVDQA64Z256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVDQA64Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVDQA64Z256rr %ymm16
+ %ymm16 = VMOVDQA64Z256rr %ymm16
+ ; CHECK: %ymm16 = VMOVDQA64Z256rr_REV %ymm16
+ %ymm16 = VMOVDQA64Z256rr_REV %ymm16
+ ; CHECK: VMOVDQU16Z256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVDQU16Z256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVDQU16Z256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVDQU16Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVDQU16Z256rr %ymm16
+ %ymm16 = VMOVDQU16Z256rr %ymm16
+ ; CHECK: %ymm16 = VMOVDQU16Z256rr_REV %ymm16
+ %ymm16 = VMOVDQU16Z256rr_REV %ymm16
+ ; CHECK: VMOVDQU32Z256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVDQU32Z256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVDQU32Z256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVDQU32Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVDQU32Z256rr %ymm16
+ %ymm16 = VMOVDQU32Z256rr %ymm16
+ ; CHECK: %ymm16 = VMOVDQU32Z256rr_REV %ymm16
+ %ymm16 = VMOVDQU32Z256rr_REV %ymm16
+ ; CHECK: VMOVDQU64Z256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVDQU64Z256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVDQU64Z256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVDQU64Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVDQU64Z256rr %ymm16
+ %ymm16 = VMOVDQU64Z256rr %ymm16
+ ; CHECK: %ymm16 = VMOVDQU64Z256rr_REV %ymm16
+ %ymm16 = VMOVDQU64Z256rr_REV %ymm16
+ ; CHECK: VMOVDQU8Z256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVDQU8Z256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVDQU8Z256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVDQU8Z256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVDQU8Z256rr %ymm16
+ %ymm16 = VMOVDQU8Z256rr %ymm16
+ ; CHECK: %ymm16 = VMOVDQU8Z256rr_REV %ymm16
+ %ymm16 = VMOVDQU8Z256rr_REV %ymm16
+ ; CHECK: %ymm16 = VMOVNTDQAZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVNTDQAZ256rm %rip, 1, _, %rax, _
+ ; CHECK: VMOVNTDQZ256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVNTDQZ256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: VMOVNTPDZ256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVNTPDZ256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: VMOVNTPSZ256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVNTPSZ256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVSHDUPZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVSHDUPZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVSHDUPZ256rr %ymm16
+ %ymm16 = VMOVSHDUPZ256rr %ymm16
+ ; CHECK: %ymm16 = VMOVSLDUPZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVSLDUPZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVSLDUPZ256rr %ymm16
+ %ymm16 = VMOVSLDUPZ256rr %ymm16
+ ; CHECK: VMOVUPDZ256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVUPDZ256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VMOVUPDZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VMOVUPDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMOVUPDZ256rr %ymm16
+ %ymm16 = VMOVUPDZ256rr %ymm16
+ ; CHECK: %ymm16 = VMOVUPDZ256rr_REV %ymm16
+ %ymm16 = VMOVUPDZ256rr_REV %ymm16
+ ; CHECK: VMOVUPSZ256mr %rdi, 1, _, 0, _, %ymm16
+ VMOVUPSZ256mr %rdi, 1, _, 0, _, %ymm16
+ ; CHECK: %ymm16 = VPANDDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPANDDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPANDDZ256rr %ymm16, %ymm1
+ %ymm16 = VPANDDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPANDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPANDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPANDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPANDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPAVGBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPAVGBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPAVGBZ256rr %ymm16, %ymm1
+ %ymm16 = VPAVGBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPAVGWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPAVGWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPAVGWZ256rr %ymm16, %ymm1
+ %ymm16 = VPAVGWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDBZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDDZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDSBZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDSBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDUSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDUSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDUSBZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDUSBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDUSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDUSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDUSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDUSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPADDWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPADDWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPADDWZ256rr %ymm16, %ymm1
+ %ymm16 = VPADDWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMULPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMULPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMULPDZ256rr %ymm16, %ymm1
+ %ymm16 = VMULPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMULPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMULPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMULPSZ256rr %ymm16, %ymm1
+ %ymm16 = VMULPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VORPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VORPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VORPDZ256rr %ymm16, %ymm1
+ %ymm16 = VORPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VORPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VORPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VORPSZ256rr %ymm16, %ymm1
+ %ymm16 = VORPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMADDUBSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMADDUBSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMADDUBSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMADDUBSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMADDWDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMADDWDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMADDWDZ256rr %ymm16, %ymm1
+ %ymm16 = VPMADDWDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMAXSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMAXSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMAXSBZ256rr %ymm16, %ymm1
+ %ymm16 = VPMAXSBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMAXSDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMAXSDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMAXSDZ256rr %ymm16, %ymm1
+ %ymm16 = VPMAXSDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMAXSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMAXSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMAXSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMAXSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMAXUBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMAXUBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMAXUBZ256rr %ymm16, %ymm1
+ %ymm16 = VPMAXUBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMAXUDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMAXUDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMAXUDZ256rr %ymm16, %ymm1
+ %ymm16 = VPMAXUDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMAXUWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMAXUWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMAXUWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMAXUWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMINSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMINSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMINSBZ256rr %ymm16, %ymm1
+ %ymm16 = VPMINSBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMINSDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMINSDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMINSDZ256rr %ymm16, %ymm1
+ %ymm16 = VPMINSDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMINSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMINSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMINSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMINSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMINUBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMINUBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMINUBZ256rr %ymm16, %ymm1
+ %ymm16 = VPMINUBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMINUDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMINUDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMINUDZ256rr %ymm16, %ymm1
+ %ymm16 = VPMINUDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMINUWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMINUWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMINUWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMINUWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMULDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMULDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMULDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPMULDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMULHRSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMULHRSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMULHRSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMULHRSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMULHUWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMULHUWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMULHUWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMULHUWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMULHWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMULHWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMULHWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMULHWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMULLDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMULLDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMULLDZ256rr %ymm16, %ymm1
+ %ymm16 = VPMULLDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMULLWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMULLWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMULLWZ256rr %ymm16, %ymm1
+ %ymm16 = VPMULLWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPMULUDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPMULUDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMULUDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPMULUDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPORDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPORDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPORDZ256rr %ymm16, %ymm1
+ %ymm16 = VPORDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPORQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPORQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPORQZ256rr %ymm16, %ymm1
+ %ymm16 = VPORQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBBZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBDZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBQZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBSBZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBSBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBUSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBUSBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBUSBZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBUSBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBUSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBUSWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBUSWZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBUSWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSUBWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSUBWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSUBWZ256rr %ymm16, %ymm1
+ %ymm16 = VPSUBWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPXORDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPXORDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPXORDZ256rr %ymm16, %ymm1
+ %ymm16 = VPXORDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPXORQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPXORQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPXORQZ256rr %ymm16, %ymm1
+ %ymm16 = VPXORQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VADDPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VADDPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VADDPDZ256rr %ymm16, %ymm1
+ %ymm16 = VADDPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VADDPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VADDPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VADDPSZ256rr %ymm16, %ymm1
+ %ymm16 = VADDPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VANDNPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VANDNPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VANDNPDZ256rr %ymm16, %ymm1
+ %ymm16 = VANDNPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VANDNPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VANDNPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VANDNPSZ256rr %ymm16, %ymm1
+ %ymm16 = VANDNPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VANDPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VANDPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VANDPDZ256rr %ymm16, %ymm1
+ %ymm16 = VANDPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VANDPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VANDPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VANDPSZ256rr %ymm16, %ymm1
+ %ymm16 = VANDPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VDIVPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VDIVPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VDIVPDZ256rr %ymm16, %ymm1
+ %ymm16 = VDIVPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VDIVPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VDIVPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VDIVPSZ256rr %ymm16, %ymm1
+ %ymm16 = VDIVPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMAXCPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMAXCPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMAXCPDZ256rr %ymm16, %ymm1
+ %ymm16 = VMAXCPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMAXCPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMAXCPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMAXCPSZ256rr %ymm16, %ymm1
+ %ymm16 = VMAXCPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMAXPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMAXPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMAXPDZ256rr %ymm16, %ymm1
+ %ymm16 = VMAXPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMAXPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMAXPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMAXPSZ256rr %ymm16, %ymm1
+ %ymm16 = VMAXPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMINCPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMINCPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMINCPDZ256rr %ymm16, %ymm1
+ %ymm16 = VMINCPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMINCPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMINCPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMINCPSZ256rr %ymm16, %ymm1
+ %ymm16 = VMINCPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMINPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMINPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMINPDZ256rr %ymm16, %ymm1
+ %ymm16 = VMINPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VMINPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VMINPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VMINPSZ256rr %ymm16, %ymm1
+ %ymm16 = VMINPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VXORPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VXORPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VXORPDZ256rr %ymm16, %ymm1
+ %ymm16 = VXORPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VXORPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VXORPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VXORPSZ256rr %ymm16, %ymm1
+ %ymm16 = VXORPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPACKSSDWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPACKSSDWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPACKSSDWZ256rr %ymm16, %ymm1
+ %ymm16 = VPACKSSDWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPACKSSWBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPACKSSWBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPACKSSWBZ256rr %ymm16, %ymm1
+ %ymm16 = VPACKSSWBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPACKUSDWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPACKUSDWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPACKUSDWZ256rr %ymm16, %ymm1
+ %ymm16 = VPACKUSDWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPACKUSWBZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPACKUSWBZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPACKUSWBZ256rr %ymm16, %ymm1
+ %ymm16 = VPACKUSWBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VUNPCKHPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VUNPCKHPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VUNPCKHPDZ256rr %ymm16, %ymm1
+ %ymm16 = VUNPCKHPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VUNPCKHPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VUNPCKHPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VUNPCKHPSZ256rr %ymm16, %ymm1
+ %ymm16 = VUNPCKHPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VUNPCKLPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VUNPCKLPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VUNPCKLPDZ256rr %ymm16, %ymm1
+ %ymm16 = VUNPCKLPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VUNPCKLPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VUNPCKLPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VUNPCKLPSZ256rr %ymm16, %ymm1
+ %ymm16 = VUNPCKLPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VSUBPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VSUBPDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VSUBPDZ256rr %ymm16, %ymm1
+ %ymm16 = VSUBPDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VSUBPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VSUBPSZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VSUBPSZ256rr %ymm16, %ymm1
+ %ymm16 = VSUBPSZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKHBWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKHBWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKHBWZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKHBWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKHDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKHDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKHDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKHDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKHQDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKHQDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKHQDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKHQDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKHWDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKHWDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKHWDZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKHWDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKLBWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKLBWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKLBWZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKLBWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKLDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKLDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKLDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKLDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKLQDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKLQDQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKLQDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKLQDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPUNPCKLWDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPUNPCKLWDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPUNPCKLWDZ256rr %ymm16, %ymm1
+ %ymm16 = VPUNPCKLWDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VFMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADD132PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADD132PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADD132PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADD132PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADD213PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADD213PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADD213PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADD213PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADD231PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADD231PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADD231PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADD231PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADDSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADDSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADDSUB132PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADDSUB132PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADDSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADDSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADDSUB132PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADDSUB132PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADDSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADDSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADDSUB213PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADDSUB213PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADDSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADDSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADDSUB213PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADDSUB213PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADDSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADDSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADDSUB231PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADDSUB231PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMADDSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMADDSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMADDSUB231PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMADDSUB231PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUB132PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUB132PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUB132PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUB132PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUB213PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUB213PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUB213PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUB213PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUB231PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUB231PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUB231PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUB231PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUBADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUBADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUBADD132PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUBADD132PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUBADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUBADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUBADD132PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUBADD132PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUBADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUBADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUBADD213PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUBADD213PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUBADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUBADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUBADD213PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUBADD213PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUBADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUBADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUBADD231PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUBADD231PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFMSUBADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFMSUBADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFMSUBADD231PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFMSUBADD231PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMADD132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMADD132PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMADD132PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMADD132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMADD132PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMADD132PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMADD213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMADD213PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMADD213PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMADD213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMADD213PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMADD213PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMADD231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMADD231PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMADD231PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMADD231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMADD231PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMADD231PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMSUB132PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMSUB132PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMSUB132PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMSUB132PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMSUB132PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMSUB132PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMSUB213PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMSUB213PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMSUB213PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMSUB213PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMSUB213PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMSUB213PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMSUB231PDZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMSUB231PDZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMSUB231PDZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VFNMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ %ymm16 = VFNMSUB231PSZ256m %ymm16, %ymm16, %rsi, 1, _, 0, _
+ ; CHECK: %ymm16 = VFNMSUB231PSZ256r %ymm16, %ymm1, %ymm2
+ %ymm16 = VFNMSUB231PSZ256r %ymm16, %ymm1, %ymm2
+ ; CHECK: %ymm16 = VPSRADZ256ri %ymm16, 7
+ %ymm16 = VPSRADZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSRADZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRADZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRADZ256rr %ymm16, %xmm1
+ %ymm16 = VPSRADZ256rr %ymm16, %xmm1
+ ; CHECK: %ymm16 = VPSRAVDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRAVDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRAVDZ256rr %ymm16, %ymm1
+ %ymm16 = VPSRAVDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSRAWZ256ri %ymm16, 7
+ %ymm16 = VPSRAWZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSRAWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRAWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRAWZ256rr %ymm16, %xmm1
+ %ymm16 = VPSRAWZ256rr %ymm16, %xmm1
+ ; CHECK: %ymm16 = VPSRLDQZ256rr %ymm16, %ymm1
+ %ymm16 = VPSRLDQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSRLDZ256ri %ymm16, 7
+ %ymm16 = VPSRLDZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSRLDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRLDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRLDZ256rr %ymm16, %xmm1
+ %ymm16 = VPSRLDZ256rr %ymm16, %xmm1
+ ; CHECK: %ymm16 = VPSRLQZ256ri %ymm16, 7
+ %ymm16 = VPSRLQZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSRLQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRLQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRLQZ256rr %ymm16, %xmm1
+ %ymm16 = VPSRLQZ256rr %ymm16, %xmm1
+ ; CHECK: %ymm16 = VPSRLVDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRLVDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRLVDZ256rr %ymm16, %ymm1
+ %ymm16 = VPSRLVDZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSRLVQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRLVQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRLVQZ256rr %ymm16, %ymm1
+ %ymm16 = VPSRLVQZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSRLWZ256ri %ymm16, 7
+ %ymm16 = VPSRLWZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSRLWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSRLWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSRLWZ256rr %ymm16, %xmm1
+ %ymm16 = VPSRLWZ256rr %ymm16, %xmm1
+ ; CHECK: %ymm16 = VPMOVSXBDZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVSXBDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVSXBDZ256rr %xmm0
+ %ymm16 = VPMOVSXBDZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVSXBQZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVSXBQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVSXBQZ256rr %xmm0
+ %ymm16 = VPMOVSXBQZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVSXBWZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVSXBWZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVSXBWZ256rr %xmm0
+ %ymm16 = VPMOVSXBWZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVSXDQZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVSXDQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVSXDQZ256rr %xmm0
+ %ymm16 = VPMOVSXDQZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVSXWDZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVSXWDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVSXWDZ256rr %xmm0
+ %ymm16 = VPMOVSXWDZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVSXWQZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVSXWQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVSXWQZ256rr %xmm0
+ %ymm16 = VPMOVSXWQZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVZXBDZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVZXBDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVZXBDZ256rr %xmm0
+ %ymm16 = VPMOVZXBDZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVZXBQZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVZXBQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVZXBQZ256rr %xmm0
+ %ymm16 = VPMOVZXBQZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVZXBWZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVZXBWZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVZXBWZ256rr %xmm0
+ %ymm16 = VPMOVZXBWZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVZXDQZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVZXDQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVZXDQZ256rr %xmm0
+ %ymm16 = VPMOVZXDQZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVZXWDZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVZXWDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVZXWDZ256rr %xmm0
+ %ymm16 = VPMOVZXWDZ256rr %xmm0
+ ; CHECK: %ymm16 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPMOVZXWQZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPMOVZXWQZ256rr %xmm0
+ %ymm16 = VPMOVZXWQZ256rr %xmm0
+ ; CHECK: %ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _
+ %ymm16 = VBROADCASTSDZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VBROADCASTSDZ256r %xmm0
+ %ymm16 = VBROADCASTSDZ256r %xmm0
+ ; CHECK: %ymm16 = VBROADCASTSDZ256r_s %xmm0
+ %ymm16 = VBROADCASTSDZ256r_s %xmm0
+ ; CHECK: %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
+ %ymm16 = VBROADCASTSSZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VBROADCASTSSZ256r %xmm0
+ %ymm16 = VBROADCASTSSZ256r %xmm0
+ ; CHECK: %ymm16 = VBROADCASTSSZ256r_s %xmm0
+ %ymm16 = VBROADCASTSSZ256r_s %xmm0
+ ; CHECK: %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
+ %ymm16 = VPBROADCASTBZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPBROADCASTBZ256r %xmm0
+ %ymm16 = VPBROADCASTBZ256r %xmm0
+ ; CHECK: %ymm16 = VPBROADCASTDZ256m %rip, 1, _, %rax, _
+ %ymm16 = VPBROADCASTDZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPBROADCASTDZ256r %xmm0
+ %ymm16 = VPBROADCASTDZ256r %xmm0
+ ; CHECK: %ymm16 = VPBROADCASTWZ256m %rip, 1, _, %rax, _
+ %ymm16 = VPBROADCASTWZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPBROADCASTWZ256r %xmm0
+ %ymm16 = VPBROADCASTWZ256r %xmm0
+ ; CHECK: %ymm16 = VPBROADCASTQZ256m %rip, 1, _, %rax, _
+ %ymm16 = VPBROADCASTQZ256m %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPBROADCASTQZ256r %xmm0
+ %ymm16 = VPBROADCASTQZ256r %xmm0
+ ; CHECK: %ymm16 = VPABSBZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPABSBZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPABSBZ256rr %ymm16
+ %ymm16 = VPABSBZ256rr %ymm16
+ ; CHECK: %ymm16 = VPABSDZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPABSDZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPABSDZ256rr %ymm16
+ %ymm16 = VPABSDZ256rr %ymm16
+ ; CHECK: %ymm16 = VPABSWZ256rm %rip, 1, _, %rax, _
+ %ymm16 = VPABSWZ256rm %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPABSWZ256rr %ymm16
+ %ymm16 = VPABSWZ256rr %ymm16
+ ; CHECK: %ymm16 = VPSADBWZ256rm %ymm16, 1, _, %rax, _, _
+ %ymm16 = VPSADBWZ256rm %ymm16, 1, _, %rax, _, _
+ ; CHECK: %ymm16 = VPSADBWZ256rr %ymm16, %ymm1
+ %ymm16 = VPSADBWZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPERMDZ256rm %ymm16, %rdi, 1, _, 0, _
+ %ymm16 = VPERMDZ256rm %ymm16, %rdi, 1, _, 0, _
+ ; CHECK: %ymm16 = VPERMDZ256rr %ymm1, %ymm16
+ %ymm16 = VPERMDZ256rr %ymm1, %ymm16
+ ; CHECK: %ymm16 = VPERMILPDZ256mi %rdi, 1, _, 0, _, _
+ %ymm16 = VPERMILPDZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm16 = VPERMILPDZ256ri %ymm16, 7
+ %ymm16 = VPERMILPDZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPERMILPDZ256rm %ymm16, %rdi, 1, _, 0, _
+ %ymm16 = VPERMILPDZ256rm %ymm16, %rdi, 1, _, 0, _
+ ; CHECK: %ymm16 = VPERMILPDZ256rr %ymm1, %ymm16
+ %ymm16 = VPERMILPDZ256rr %ymm1, %ymm16
+ ; CHECK: %ymm16 = VPERMILPSZ256mi %rdi, 1, _, 0, _, _
+ %ymm16 = VPERMILPSZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm16 = VPERMILPSZ256ri %ymm16, 7
+ %ymm16 = VPERMILPSZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPERMILPSZ256rm %ymm16, %rdi, 1, _, 0, _
+ %ymm16 = VPERMILPSZ256rm %ymm16, %rdi, 1, _, 0, _
+ ; CHECK: %ymm16 = VPERMILPSZ256rr %ymm1, %ymm16
+ %ymm16 = VPERMILPSZ256rr %ymm1, %ymm16
+ ; CHECK: %ymm16 = VPERMPDZ256mi %rdi, 1, _, 0, _, _
+ %ymm16 = VPERMPDZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm16 = VPERMPDZ256ri %ymm16, 7
+ %ymm16 = VPERMPDZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPERMPSZ256rm %ymm16, %rdi, 1, _, 0, _
+ %ymm16 = VPERMPSZ256rm %ymm16, %rdi, 1, _, 0, _
+ ; CHECK: %ymm16 = VPERMPSZ256rr %ymm1, %ymm16
+ %ymm16 = VPERMPSZ256rr %ymm1, %ymm16
+ ; CHECK: %ymm16 = VPERMQZ256mi %rdi, 1, _, 0, _, _
+ %ymm16 = VPERMQZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm16 = VPERMQZ256ri %ymm16, 7
+ %ymm16 = VPERMQZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSLLDQZ256rr %ymm16, 14
+ %ymm16 = VPSLLDQZ256rr %ymm16, 14
+ ; CHECK: %ymm16 = VPSLLDZ256ri %ymm16, 7
+ %ymm16 = VPSLLDZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSLLDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSLLDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSLLDZ256rr %ymm16, 14
+ %ymm16 = VPSLLDZ256rr %ymm16, 14
+ ; CHECK: %ymm16 = VPSLLQZ256ri %ymm16, 7
+ %ymm16 = VPSLLQZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSLLQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSLLQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSLLQZ256rr %ymm16, 14
+ %ymm16 = VPSLLQZ256rr %ymm16, 14
+ ; CHECK: %ymm16 = VPSLLVDZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSLLVDZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSLLVDZ256rr %ymm16, 14
+ %ymm16 = VPSLLVDZ256rr %ymm16, 14
+ ; CHECK: %ymm16 = VPSLLVQZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSLLVQZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSLLVQZ256rr %ymm16, 14
+ %ymm16 = VPSLLVQZ256rr %ymm16, 14
+ ; CHECK: %ymm16 = VPSLLWZ256ri %ymm16, 7
+ %ymm16 = VPSLLWZ256ri %ymm16, 7
+ ; CHECK: %ymm16 = VPSLLWZ256rm %ymm16, %rip, 1, _, %rax, _
+ %ymm16 = VPSLLWZ256rm %ymm16, %rip, 1, _, %rax, _
+ ; CHECK: %ymm16 = VPSLLWZ256rr %ymm16, 14
+ %ymm16 = VPSLLWZ256rr %ymm16, 14
+ ; CHECK: %ymm16 = VCVTDQ2PDZ256rm %rdi, %ymm16, 1, _, 0
+ %ymm16 = VCVTDQ2PDZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %ymm16 = VCVTDQ2PDZ256rr %xmm0
+ %ymm16 = VCVTDQ2PDZ256rr %xmm0
+ ; CHECK: %ymm16 = VCVTDQ2PSZ256rm %rdi, %ymm16, 1, _, 0
+ %ymm16 = VCVTDQ2PSZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %ymm16 = VCVTDQ2PSZ256rr %ymm16
+ %ymm16 = VCVTDQ2PSZ256rr %ymm16
+ ; CHECK: %xmm0 = VCVTPD2DQZ256rm %rdi, %ymm16, 1, _, 0
+ %xmm0 = VCVTPD2DQZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPD2DQZ256rr %ymm16
+ %xmm0 = VCVTPD2DQZ256rr %ymm16
+ ; CHECK: %xmm0 = VCVTPD2PSZ256rm %rdi, %ymm16, 1, _, 0
+ %xmm0 = VCVTPD2PSZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %xmm0 = VCVTPD2PSZ256rr %ymm16
+ %xmm0 = VCVTPD2PSZ256rr %ymm16
+ ; CHECK: %ymm16 = VCVTPS2DQZ256rm %rdi, %ymm16, 1, _, 0
+ %ymm16 = VCVTPS2DQZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %ymm16 = VCVTPS2DQZ256rr %ymm16
+ %ymm16 = VCVTPS2DQZ256rr %ymm16
+ ; CHECK: %ymm16 = VCVTPS2PDZ256rm %rdi, %ymm16, 1, _, 0
+ %ymm16 = VCVTPS2PDZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %ymm16 = VCVTPS2PDZ256rr %xmm0
+ %ymm16 = VCVTPS2PDZ256rr %xmm0
+ ; CHECK: VCVTPS2PHZ256mr %rdi, %ymm16, 1, _, 0, _, _
+ VCVTPS2PHZ256mr %rdi, %ymm16, 1, _, 0, _, _
+ ; CHECK: %xmm0 = VCVTPS2PHZ256rr %ymm16, _
+ %xmm0 = VCVTPS2PHZ256rr %ymm16, _
+ ; CHECK: %ymm16 = VCVTPH2PSZ256rm %rdi, %ymm16, 1, _, 0
+ %ymm16 = VCVTPH2PSZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %ymm16 = VCVTPH2PSZ256rr %xmm0
+ %ymm16 = VCVTPH2PSZ256rr %xmm0
+ ; CHECK: %xmm0 = VCVTTPD2DQZ256rm %rdi, %ymm16, 1, _, 0
+ %xmm0 = VCVTTPD2DQZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %xmm0 = VCVTTPD2DQZ256rr %ymm16
+ %xmm0 = VCVTTPD2DQZ256rr %ymm16
+ ; CHECK: %ymm16 = VCVTTPS2DQZ256rm %rdi, %ymm16, 1, _, 0
+ %ymm16 = VCVTTPS2DQZ256rm %rdi, %ymm16, 1, _, 0
+ ; CHECK: %ymm16 = VCVTTPS2DQZ256rr %ymm16
+ %ymm16 = VCVTTPS2DQZ256rr %ymm16
+ ; CHECK: %ymm16 = VSQRTPDZ256m %rdi, _, _, _, _
+ %ymm16 = VSQRTPDZ256m %rdi, _, _, _, _
+ ; CHECK: %ymm16 = VSQRTPDZ256r %ymm16
+ %ymm16 = VSQRTPDZ256r %ymm16
+ ; CHECK: %ymm16 = VSQRTPSZ256m %rdi, _, _, _, _
+ %ymm16 = VSQRTPSZ256m %rdi, _, _, _, _
+ ; CHECK: %ymm16 = VSQRTPSZ256r %ymm16
+ %ymm16 = VSQRTPSZ256r %ymm16
+ ; CHECK: %ymm16 = VPALIGNRZ256rmi %ymm16, %rdi, _, _, _, _, _
+ %ymm16 = VPALIGNRZ256rmi %ymm16, %rdi, _, _, _, _, _
+ ; CHECK: %ymm16 = VPALIGNRZ256rri %ymm16, %ymm1, _
+ %ymm16 = VPALIGNRZ256rri %ymm16, %ymm1, _
+ ; CHECK: %ymm16 = VMOVUPSZ256rm %rdi, 1, _, 0, _
+ %ymm16 = VMOVUPSZ256rm %rdi, 1, _, 0, _
+ ; CHECK: %ymm16 = VMOVUPSZ256rr %ymm16
+ %ymm16 = VMOVUPSZ256rr %ymm16
+ ; CHECK: %ymm16 = VMOVUPSZ256rr_REV %ymm16
+ %ymm16 = VMOVUPSZ256rr_REV %ymm16
+ ; CHECK: %ymm16 = VPSHUFBZ256rm %ymm16, _, _, _, _, _
+ %ymm16 = VPSHUFBZ256rm %ymm16, _, _, _, _, _
+ ; CHECK: %ymm16 = VPSHUFBZ256rr %ymm16, %ymm1
+ %ymm16 = VPSHUFBZ256rr %ymm16, %ymm1
+ ; CHECK: %ymm16 = VPSHUFDZ256mi %rdi, 1, _, 0, _, _
+ %ymm16 = VPSHUFDZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm16 = VPSHUFDZ256ri %ymm16, -24
+ %ymm16 = VPSHUFDZ256ri %ymm16, -24
+ ; CHECK: %ymm16 = VPSHUFHWZ256mi %rdi, 1, _, 0, _, _
+ %ymm16 = VPSHUFHWZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm16 = VPSHUFHWZ256ri %ymm16, -24
+ %ymm16 = VPSHUFHWZ256ri %ymm16, -24
+ ; CHECK: %ymm16 = VPSHUFLWZ256mi %rdi, 1, _, 0, _, _
+ %ymm16 = VPSHUFLWZ256mi %rdi, 1, _, 0, _, _
+ ; CHECK: %ymm16 = VPSHUFLWZ256ri %ymm16, -24
+ %ymm16 = VPSHUFLWZ256ri %ymm16, -24
+ ; CHECK: %ymm16 = VSHUFPDZ256rmi %ymm16, _, _, _, _, _, _
+ %ymm16 = VSHUFPDZ256rmi %ymm16, _, _, _, _, _, _
+ ; CHECK: %ymm16 = VSHUFPDZ256rri %ymm16, _, _
+ %ymm16 = VSHUFPDZ256rri %ymm16, _, _
+ ; CHECK: %ymm16 = VSHUFPSZ256rmi %ymm16, _, _, _, _, _, _
+ %ymm16 = VSHUFPSZ256rmi %ymm16, _, _, _, _, _, _
+ ; CHECK: %ymm16 = VSHUFPSZ256rri %ymm16, _, _
+ %ymm16 = VSHUFPSZ256rri %ymm16, _, _
+
+ RET 0, %zmm0, %zmm1
+...
+---
+ # CHECK-LABEL: name: evex_z128_to_evex_test
+ # CHECK: bb.0:
+
+name: evex_z128_to_evex_test
+body: |
+ bb.0:
+ ; CHECK: VMOVAPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVAPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVAPDZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVAPDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVAPDZ128rr %xmm16
+ %xmm16 = VMOVAPDZ128rr %xmm16
+ ; CHECK: VMOVAPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVAPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVAPSZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVAPSZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVAPSZ128rr %xmm16
+ %xmm16 = VMOVAPSZ128rr %xmm16
+ ; CHECK: VMOVDQA32Z128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVDQA32Z128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVDQA32Z128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVDQA32Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVDQA32Z128rr %xmm16
+ %xmm16 = VMOVDQA32Z128rr %xmm16
+ ; CHECK: VMOVDQA64Z128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVDQA64Z128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVDQA64Z128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVDQA64Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVDQA64Z128rr %xmm16
+ %xmm16 = VMOVDQA64Z128rr %xmm16
+ ; CHECK: VMOVDQU16Z128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVDQU16Z128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVDQU16Z128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVDQU16Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVDQU16Z128rr %xmm16
+ %xmm16 = VMOVDQU16Z128rr %xmm16
+ ; CHECK: VMOVDQU32Z128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVDQU32Z128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVDQU32Z128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVDQU32Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVDQU32Z128rr %xmm16
+ %xmm16 = VMOVDQU32Z128rr %xmm16
+ ; CHECK: VMOVDQU64Z128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVDQU64Z128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVDQU64Z128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVDQU64Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVDQU64Z128rr %xmm16
+ %xmm16 = VMOVDQU64Z128rr %xmm16
+ ; CHECK: VMOVDQU8Z128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVDQU8Z128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVDQU8Z128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVDQU8Z128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVDQU8Z128rr %xmm16
+ %xmm16 = VMOVDQU8Z128rr %xmm16
+ ; CHECK: %xmm16 = VMOVDQU8Z128rr_REV %xmm16
+ %xmm16 = VMOVDQU8Z128rr_REV %xmm16
+ ; CHECK: %xmm16 = VMOVNTDQAZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVNTDQAZ128rm %rip, 1, _, %rax, _
+ ; CHECK: VMOVUPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVUPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVUPDZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVUPDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVUPDZ128rr %xmm16
+ %xmm16 = VMOVUPDZ128rr %xmm16
+ ; CHECK: %xmm16 = VMOVUPDZ128rr_REV %xmm16
+ %xmm16 = VMOVUPDZ128rr_REV %xmm16
+ ; CHECK: VMOVUPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVUPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVUPSZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VMOVUPSZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMOVUPSZ128rr %xmm16
+ %xmm16 = VMOVUPSZ128rr %xmm16
+ ; CHECK: %xmm16 = VMOVUPSZ128rr_REV %xmm16
+ %xmm16 = VMOVUPSZ128rr_REV %xmm16
+ ; CHECK: VMOVNTDQZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVNTDQZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: VMOVNTPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVNTPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: VMOVNTPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVNTPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVAPDZ128rr_REV %xmm16
+ %xmm16 = VMOVAPDZ128rr_REV %xmm16
+ ; CHECK: %xmm16 = VMOVAPSZ128rr_REV %xmm16
+ %xmm16 = VMOVAPSZ128rr_REV %xmm16
+ ; CHECK: %xmm16 = VMOVDQA32Z128rr_REV %xmm16
+ %xmm16 = VMOVDQA32Z128rr_REV %xmm16
+ ; CHECK: %xmm16 = VMOVDQA64Z128rr_REV %xmm16
+ %xmm16 = VMOVDQA64Z128rr_REV %xmm16
+ ; CHECK: %xmm16 = VMOVDQU16Z128rr_REV %xmm16
+ %xmm16 = VMOVDQU16Z128rr_REV %xmm16
+ ; CHECK: %xmm16 = VMOVDQU32Z128rr_REV %xmm16
+ %xmm16 = VMOVDQU32Z128rr_REV %xmm16
+ ; CHECK: %xmm16 = VMOVDQU64Z128rr_REV %xmm16
+ %xmm16 = VMOVDQU64Z128rr_REV %xmm16
+ ; CHECK: %xmm16 = VPMOVSXBDZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVSXBDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVSXBDZ128rr %xmm16
+ %xmm16 = VPMOVSXBDZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVSXBQZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVSXBQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVSXBQZ128rr %xmm16
+ %xmm16 = VPMOVSXBQZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVSXBWZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVSXBWZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVSXBWZ128rr %xmm16
+ %xmm16 = VPMOVSXBWZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVSXDQZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVSXDQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVSXDQZ128rr %xmm16
+ %xmm16 = VPMOVSXDQZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVSXWDZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVSXWDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVSXWDZ128rr %xmm16
+ %xmm16 = VPMOVSXWDZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVSXWQZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVSXWQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVSXWQZ128rr %xmm16
+ %xmm16 = VPMOVSXWQZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVZXBDZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVZXBDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVZXBDZ128rr %xmm16
+ %xmm16 = VPMOVZXBDZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVZXBQZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVZXBQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVZXBQZ128rr %xmm16
+ %xmm16 = VPMOVZXBQZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVZXBWZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVZXBWZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVZXBWZ128rr %xmm16
+ %xmm16 = VPMOVZXBWZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVZXDQZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVZXDQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVZXDQZ128rr %xmm16
+ %xmm16 = VPMOVZXDQZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVZXWDZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVZXWDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVZXWDZ128rr %xmm16
+ %xmm16 = VPMOVZXWDZ128rr %xmm16
+ ; CHECK: %xmm16 = VPMOVZXWQZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPMOVZXWQZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMOVZXWQZ128rr %xmm16
+ %xmm16 = VPMOVZXWQZ128rr %xmm16
+ ; CHECK: VMOVHPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVHPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVHPDZ128rm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VMOVHPDZ128rm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: VMOVHPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVHPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVHPSZ128rm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VMOVHPSZ128rm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: VMOVLPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVLPDZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVLPDZ128rm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VMOVLPDZ128rm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: VMOVLPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ VMOVLPSZ128mr %rdi, 1, _, 0, _, %xmm16
+ ; CHECK: %xmm16 = VMOVLPSZ128rm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VMOVLPSZ128rm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VMAXCPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXCPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXCPDZ128rr %xmm16, %xmm1
+ %xmm16 = VMAXCPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXCPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXCPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXCPSZ128rr %xmm16, %xmm1
+ %xmm16 = VMAXCPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXPDZ128rr %xmm16, %xmm1
+ %xmm16 = VMAXPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXPSZ128rr %xmm16, %xmm1
+ %xmm16 = VMAXPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINCPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINCPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINCPDZ128rr %xmm16, %xmm1
+ %xmm16 = VMINCPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINCPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINCPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINCPSZ128rr %xmm16, %xmm1
+ %xmm16 = VMINCPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINPDZ128rr %xmm16, %xmm1
+ %xmm16 = VMINPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINPSZ128rr %xmm16, %xmm1
+ %xmm16 = VMINPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMULPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMULPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMULPDZ128rr %xmm16, %xmm1
+ %xmm16 = VMULPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMULPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMULPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMULPSZ128rr %xmm16, %xmm1
+ %xmm16 = VMULPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VORPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VORPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VORPDZ128rr %xmm16, %xmm1
+ %xmm16 = VORPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VORPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VORPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VORPSZ128rr %xmm16, %xmm1
+ %xmm16 = VORPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDBZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDDZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDSBZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDSBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDUSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDUSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDUSBZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDUSBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDUSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDUSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDUSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDUSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPADDWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPADDWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPADDWZ128rr %xmm16, %xmm1
+ %xmm16 = VPADDWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPANDDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPANDDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPANDDZ128rr %xmm16, %xmm1
+ %xmm16 = VPANDDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPANDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPANDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPANDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPANDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPAVGBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPAVGBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPAVGBZ128rr %xmm16, %xmm1
+ %xmm16 = VPAVGBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPAVGWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPAVGWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPAVGWZ128rr %xmm16, %xmm1
+ %xmm16 = VPAVGWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMAXSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMAXSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMAXSBZ128rr %xmm16, %xmm1
+ %xmm16 = VPMAXSBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMAXSDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMAXSDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMAXSDZ128rr %xmm16, %xmm1
+ %xmm16 = VPMAXSDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMAXSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMAXSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMAXSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMAXSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMAXUBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMAXUBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMAXUBZ128rr %xmm16, %xmm1
+ %xmm16 = VPMAXUBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMAXUDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMAXUDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMAXUDZ128rr %xmm16, %xmm1
+ %xmm16 = VPMAXUDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMAXUWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMAXUWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMAXUWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMAXUWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMINSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMINSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMINSBZ128rr %xmm16, %xmm1
+ %xmm16 = VPMINSBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMINSDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMINSDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMINSDZ128rr %xmm16, %xmm1
+ %xmm16 = VPMINSDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMINSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMINSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMINSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMINSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMINUBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMINUBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMINUBZ128rr %xmm16, %xmm1
+ %xmm16 = VPMINUBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMINUDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMINUDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMINUDZ128rr %xmm16, %xmm1
+ %xmm16 = VPMINUDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMINUWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMINUWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMINUWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMINUWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMULDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMULDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMULDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPMULDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMULHRSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMULHRSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMULHRSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMULHRSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMULHUWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMULHUWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMULHUWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMULHUWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMULHWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMULHWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMULHWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMULHWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMULLDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMULLDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMULLDZ128rr %xmm16, %xmm1
+ %xmm16 = VPMULLDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMULLWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMULLWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMULLWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMULLWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMULUDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMULUDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMULUDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPMULUDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPORDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPORDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPORDZ128rr %xmm16, %xmm1
+ %xmm16 = VPORDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPORQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPORQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPORQZ128rr %xmm16, %xmm1
+ %xmm16 = VPORQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBBZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBDZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBQZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBSBZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBSBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBUSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBUSBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBUSBZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBUSBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBUSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBUSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBUSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBUSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSUBWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSUBWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSUBWZ128rr %xmm16, %xmm1
+ %xmm16 = VPSUBWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VADDPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VADDPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VADDPDZ128rr %xmm16, %xmm1
+ %xmm16 = VADDPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VADDPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VADDPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VADDPSZ128rr %xmm16, %xmm1
+ %xmm16 = VADDPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VANDNPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VANDNPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VANDNPDZ128rr %xmm16, %xmm1
+ %xmm16 = VANDNPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VANDNPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VANDNPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VANDNPSZ128rr %xmm16, %xmm1
+ %xmm16 = VANDNPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VANDPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VANDPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VANDPDZ128rr %xmm16, %xmm1
+ %xmm16 = VANDPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VANDPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VANDPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VANDPSZ128rr %xmm16, %xmm1
+ %xmm16 = VANDPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VDIVPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VDIVPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VDIVPDZ128rr %xmm16, %xmm1
+ %xmm16 = VDIVPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VDIVPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VDIVPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VDIVPSZ128rr %xmm16, %xmm1
+ %xmm16 = VDIVPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPXORDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPXORDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPXORDZ128rr %xmm16, %xmm1
+ %xmm16 = VPXORDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPXORQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPXORQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPXORQZ128rr %xmm16, %xmm1
+ %xmm16 = VPXORQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VSUBPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VSUBPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VSUBPDZ128rr %xmm16, %xmm1
+ %xmm16 = VSUBPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VSUBPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VSUBPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VSUBPSZ128rr %xmm16, %xmm1
+ %xmm16 = VSUBPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VXORPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VXORPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VXORPDZ128rr %xmm16, %xmm1
+ %xmm16 = VXORPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VXORPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VXORPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VXORPSZ128rr %xmm16, %xmm1
+ %xmm16 = VXORPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMADDUBSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMADDUBSWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMADDUBSWZ128rr %xmm16, %xmm1
+ %xmm16 = VPMADDUBSWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPMADDWDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPMADDWDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPMADDWDZ128rr %xmm16, %xmm1
+ %xmm16 = VPMADDWDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPACKSSDWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPACKSSDWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPACKSSDWZ128rr %xmm16, %xmm1
+ %xmm16 = VPACKSSDWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPACKSSWBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPACKSSWBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPACKSSWBZ128rr %xmm16, %xmm1
+ %xmm16 = VPACKSSWBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPACKUSDWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPACKUSDWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPACKUSDWZ128rr %xmm16, %xmm1
+ %xmm16 = VPACKUSDWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPACKUSWBZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPACKUSWBZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPACKUSWBZ128rr %xmm16, %xmm1
+ %xmm16 = VPACKUSWBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKHBWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKHBWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKHBWZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKHBWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKHDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKHDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKHDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKHDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKHQDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKHQDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKHQDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKHQDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKHWDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKHWDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKHWDZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKHWDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKLBWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKLBWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKLBWZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKLBWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKLDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKLDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKLDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKLDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKLQDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKLQDQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKLQDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKLQDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPUNPCKLWDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPUNPCKLWDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPUNPCKLWDZ128rr %xmm16, %xmm1
+ %xmm16 = VPUNPCKLWDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VUNPCKHPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VUNPCKHPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VUNPCKHPDZ128rr %xmm16, %xmm1
+ %xmm16 = VUNPCKHPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VUNPCKHPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VUNPCKHPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VUNPCKHPSZ128rr %xmm16, %xmm1
+ %xmm16 = VUNPCKHPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VUNPCKLPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VUNPCKLPDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VUNPCKLPDZ128rr %xmm16, %xmm1
+ %xmm16 = VUNPCKLPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VUNPCKLPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VUNPCKLPSZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VUNPCKLPSZ128rr %xmm16, %xmm1
+ %xmm16 = VUNPCKLPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VFMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD132PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD132PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD132PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD132PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD213PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD213PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD213PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD213PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD231PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD231PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD231PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD231PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADDSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADDSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADDSUB132PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADDSUB132PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADDSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADDSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADDSUB132PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADDSUB132PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADDSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADDSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADDSUB213PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADDSUB213PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADDSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADDSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADDSUB213PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADDSUB213PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADDSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADDSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADDSUB231PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADDSUB231PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADDSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADDSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADDSUB231PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADDSUB231PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB132PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB132PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB132PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB132PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB213PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB213PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB213PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB213PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB231PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB231PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB231PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB231PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUBADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUBADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUBADD132PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUBADD132PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUBADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUBADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUBADD132PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUBADD132PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUBADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUBADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUBADD213PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUBADD213PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUBADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUBADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUBADD213PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUBADD213PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUBADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUBADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUBADD231PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUBADD231PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUBADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUBADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUBADD231PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUBADD231PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD132PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD132PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD132PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD132PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD213PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD213PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD213PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD213PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD231PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD231PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD231PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD231PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB132PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB132PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB132PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB132PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB132PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB132PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB213PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB213PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB213PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB213PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB213PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB213PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB231PDZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB231PDZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB231PDZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB231PSZ128m %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB231PSZ128r %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB231PSZ128r %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VPSLLDZ128ri %xmm16, 7
+ %xmm16 = VPSLLDZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSLLDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSLLDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSLLDZ128rr %xmm16, 14
+ %xmm16 = VPSLLDZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSLLQZ128ri %xmm16, 7
+ %xmm16 = VPSLLQZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSLLQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSLLQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSLLQZ128rr %xmm16, 14
+ %xmm16 = VPSLLQZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSLLVDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSLLVDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSLLVDZ128rr %xmm16, 14
+ %xmm16 = VPSLLVDZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSLLVQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSLLVQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSLLVQZ128rr %xmm16, 14
+ %xmm16 = VPSLLVQZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSLLWZ128ri %xmm16, 7
+ %xmm16 = VPSLLWZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSLLWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSLLWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSLLWZ128rr %xmm16, 14
+ %xmm16 = VPSLLWZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRADZ128ri %xmm16, 7
+ %xmm16 = VPSRADZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSRADZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRADZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRADZ128rr %xmm16, 14
+ %xmm16 = VPSRADZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRAVDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRAVDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRAVDZ128rr %xmm16, 14
+ %xmm16 = VPSRAVDZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRAWZ128ri %xmm16, 7
+ %xmm16 = VPSRAWZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSRAWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRAWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRAWZ128rr %xmm16, 14
+ %xmm16 = VPSRAWZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRLDQZ128rr %xmm16, 14
+ %xmm16 = VPSRLDQZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRLDZ128ri %xmm16, 7
+ %xmm16 = VPSRLDZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSRLDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRLDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRLDZ128rr %xmm16, 14
+ %xmm16 = VPSRLDZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRLQZ128ri %xmm16, 7
+ %xmm16 = VPSRLQZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSRLQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRLQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRLQZ128rr %xmm16, 14
+ %xmm16 = VPSRLQZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRLVDZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRLVDZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRLVDZ128rr %xmm16, 14
+ %xmm16 = VPSRLVDZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRLVQZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRLVQZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRLVQZ128rr %xmm16, 14
+ %xmm16 = VPSRLVQZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPSRLWZ128ri %xmm16, 7
+ %xmm16 = VPSRLWZ128ri %xmm16, 7
+ ; CHECK: %xmm16 = VPSRLWZ128rm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VPSRLWZ128rm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPSRLWZ128rr %xmm16, 14
+ %xmm16 = VPSRLWZ128rr %xmm16, 14
+ ; CHECK: %xmm16 = VPERMILPDZ128mi %rdi, 1, _, 0, _, _
+ %xmm16 = VPERMILPDZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm16 = VPERMILPDZ128ri %xmm16, 9
+ %xmm16 = VPERMILPDZ128ri %xmm16, 9
+ ; CHECK: %xmm16 = VPERMILPDZ128rm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VPERMILPDZ128rm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VPERMILPDZ128rr %xmm16, %xmm1
+ %xmm16 = VPERMILPDZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPERMILPSZ128mi %rdi, 1, _, 0, _, _
+ %xmm16 = VPERMILPSZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm16 = VPERMILPSZ128ri %xmm16, 9
+ %xmm16 = VPERMILPSZ128ri %xmm16, 9
+ ; CHECK: %xmm16 = VPERMILPSZ128rm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VPERMILPSZ128rm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VPERMILPSZ128rr %xmm16, %xmm1
+ %xmm16 = VPERMILPSZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VCVTPH2PSZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTPH2PSZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTPH2PSZ128rr %xmm16
+ %xmm16 = VCVTPH2PSZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTDQ2PDZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTDQ2PDZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTDQ2PDZ128rr %xmm16
+ %xmm16 = VCVTDQ2PDZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTDQ2PSZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTDQ2PSZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTDQ2PSZ128rr %xmm16
+ %xmm16 = VCVTDQ2PSZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTPD2DQZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTPD2DQZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTPD2DQZ128rr %xmm16
+ %xmm16 = VCVTPD2DQZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTPD2PSZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTPD2PSZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTPD2PSZ128rr %xmm16
+ %xmm16 = VCVTPD2PSZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTPS2DQZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTPS2DQZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTPS2DQZ128rr %xmm16
+ %xmm16 = VCVTPS2DQZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTPS2PDZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTPS2PDZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTPS2PDZ128rr %xmm16
+ %xmm16 = VCVTPS2PDZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTTPD2DQZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTTPD2DQZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTTPD2DQZ128rr %xmm16
+ %xmm16 = VCVTTPD2DQZ128rr %xmm16
+ ; CHECK: %xmm16 = VCVTTPS2DQZ128rm %rdi, %xmm16, 1, _, 0
+ %xmm16 = VCVTTPS2DQZ128rm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %xmm16 = VCVTTPS2DQZ128rr %xmm16
+ %xmm16 = VCVTTPS2DQZ128rr %xmm16
+ ; CHECK: %xmm16 = VSQRTPDZ128m %rdi, _, _, _, _
+ %xmm16 = VSQRTPDZ128m %rdi, _, _, _, _
+ ; CHECK: %xmm16 = VSQRTPDZ128r %xmm16
+ %xmm16 = VSQRTPDZ128r %xmm16
+ ; CHECK: %xmm16 = VSQRTPSZ128m %rdi, _, _, _, _
+ %xmm16 = VSQRTPSZ128m %rdi, _, _, _, _
+ ; CHECK: %xmm16 = VSQRTPSZ128r %xmm16
+ %xmm16 = VSQRTPSZ128r %xmm16
+ ; CHECK: %xmm16 = VMOVDDUPZ128rm %rdi, 1, _, 0, _
+ %xmm16 = VMOVDDUPZ128rm %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VMOVDDUPZ128rr %xmm16
+ %xmm16 = VMOVDDUPZ128rr %xmm16
+ ; CHECK: %xmm16 = VMOVSHDUPZ128rm %rdi, 1, _, 0, _
+ %xmm16 = VMOVSHDUPZ128rm %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VMOVSHDUPZ128rr %xmm16
+ %xmm16 = VMOVSHDUPZ128rr %xmm16
+ ; CHECK: %xmm16 = VMOVSLDUPZ128rm %rdi, 1, _, 0, _
+ %xmm16 = VMOVSLDUPZ128rm %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VMOVSLDUPZ128rr %xmm16
+ %xmm16 = VMOVSLDUPZ128rr %xmm16
+ ; CHECK: %xmm16 = VPSHUFBZ128rm %xmm16, _, _, _, _, _
+ %xmm16 = VPSHUFBZ128rm %xmm16, _, _, _, _, _
+ ; CHECK: %xmm16 = VPSHUFBZ128rr %xmm16, %xmm1
+ %xmm16 = VPSHUFBZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VPSHUFDZ128mi %rdi, 1, _, 0, _, _
+ %xmm16 = VPSHUFDZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm16 = VPSHUFDZ128ri %xmm16, -24
+ %xmm16 = VPSHUFDZ128ri %xmm16, -24
+ ; CHECK: %xmm16 = VPSHUFHWZ128mi %rdi, 1, _, 0, _, _
+ %xmm16 = VPSHUFHWZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm16 = VPSHUFHWZ128ri %xmm16, -24
+ %xmm16 = VPSHUFHWZ128ri %xmm16, -24
+ ; CHECK: %xmm16 = VPSHUFLWZ128mi %rdi, 1, _, 0, _, _
+ %xmm16 = VPSHUFLWZ128mi %rdi, 1, _, 0, _, _
+ ; CHECK: %xmm16 = VPSHUFLWZ128ri %xmm16, -24
+ %xmm16 = VPSHUFLWZ128ri %xmm16, -24
+ ; CHECK: %xmm16 = VPSLLDQZ128rr %xmm16, %xmm1
+ %xmm16 = VPSLLDQZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VSHUFPDZ128rmi %xmm16, _, _, _, _, _, _
+ %xmm16 = VSHUFPDZ128rmi %xmm16, _, _, _, _, _, _
+ ; CHECK: %xmm16 = VSHUFPDZ128rri %xmm16, _, _
+ %xmm16 = VSHUFPDZ128rri %xmm16, _, _
+ ; CHECK: %xmm16 = VSHUFPSZ128rmi %xmm16, _, _, _, _, _, _
+ %xmm16 = VSHUFPSZ128rmi %xmm16, _, _, _, _, _, _
+ ; CHECK: %xmm16 = VSHUFPSZ128rri %xmm16, _, _
+ %xmm16 = VSHUFPSZ128rri %xmm16, _, _
+ ; CHECK: %xmm16 = VPSADBWZ128rm %xmm16, 1, _, %rax, _, _
+ %xmm16 = VPSADBWZ128rm %xmm16, 1, _, %rax, _, _
+ ; CHECK: %xmm16 = VPSADBWZ128rr %xmm16, %xmm1
+ %xmm16 = VPSADBWZ128rr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VBROADCASTSSZ128m %rip, _, _, _, _
+ %xmm16 = VBROADCASTSSZ128m %rip, _, _, _, _
+ ; CHECK: %xmm16 = VBROADCASTSSZ128r %xmm16
+ %xmm16 = VBROADCASTSSZ128r %xmm16
+ ; CHECK: %xmm16 = VBROADCASTSSZ128r_s %xmm16
+ %xmm16 = VBROADCASTSSZ128r_s %xmm16
+ ; CHECK: %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _
+ %xmm16 = VPBROADCASTBZ128m %rip, _, _, _, _
+ ; CHECK: %xmm16 = VPBROADCASTBZ128r %xmm16
+ %xmm16 = VPBROADCASTBZ128r %xmm16
+ ; CHECK: %xmm16 = VPBROADCASTDZ128m %rip, _, _, _, _
+ %xmm16 = VPBROADCASTDZ128m %rip, _, _, _, _
+ ; CHECK: %xmm16 = VPBROADCASTDZ128r %xmm16
+ %xmm16 = VPBROADCASTDZ128r %xmm16
+ ; CHECK: %xmm16 = VPBROADCASTQZ128m %rip, _, _, _, _
+ %xmm16 = VPBROADCASTQZ128m %rip, _, _, _, _
+ ; CHECK: %xmm16 = VPBROADCASTQZ128r %xmm16
+ %xmm16 = VPBROADCASTQZ128r %xmm16
+ ; CHECK: %xmm16 = VPBROADCASTWZ128m %rip, _, _, _, _
+ %xmm16 = VPBROADCASTWZ128m %rip, _, _, _, _
+ ; CHECK: %xmm16 = VPBROADCASTWZ128r %xmm16
+ %xmm16 = VPBROADCASTWZ128r %xmm16
+ ; CHECK: %xmm16 = VCVTPS2PHZ128rr %xmm16, 2
+ %xmm16 = VCVTPS2PHZ128rr %xmm16, 2
+ ; CHECK: VCVTPS2PHZ128mr %rdi, %xmm16, 1, _, 0, _, _
+ VCVTPS2PHZ128mr %rdi, %xmm16, 1, _, 0, _, _
+ ; CHECK: %xmm16 = VPABSBZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPABSBZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPABSBZ128rr %xmm16
+ %xmm16 = VPABSBZ128rr %xmm16
+ ; CHECK: %xmm16 = VPABSDZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPABSDZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPABSDZ128rr %xmm16
+ %xmm16 = VPABSDZ128rr %xmm16
+ ; CHECK: %xmm16 = VPABSWZ128rm %rip, 1, _, %rax, _
+ %xmm16 = VPABSWZ128rm %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VPABSWZ128rr %xmm16
+ %xmm16 = VPABSWZ128rr %xmm16
+ ; CHECK: %xmm16 = VPALIGNRZ128rmi %xmm16, _, _, _, _, _, _
+ %xmm16 = VPALIGNRZ128rmi %xmm16, _, _, _, _, _, _
+ ; CHECK: %xmm16 = VPALIGNRZ128rri %xmm16, %xmm1, 15
+ %xmm16 = VPALIGNRZ128rri %xmm16, %xmm1, 15
+
+ RET 0, %zmm0, %zmm1
+...
+---
+ # CHECK-LABEL: name: evex_scalar_to_evex_test
+ # CHECK: bb.0:
+
+name: evex_scalar_to_evex_test
+body: |
+ bb.0:
+ ; CHECK: %xmm16 = VADDSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VADDSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VADDSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VADDSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VADDSDZrr %xmm16, %xmm1
+ %xmm16 = VADDSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VADDSDZrr_Int %xmm16, %xmm1
+ %xmm16 = VADDSDZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VADDSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VADDSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VADDSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VADDSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VADDSSZrr %xmm16, %xmm1
+ %xmm16 = VADDSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VADDSSZrr_Int %xmm16, %xmm1
+ %xmm16 = VADDSSZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VDIVSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VDIVSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VDIVSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VDIVSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VDIVSDZrr %xmm16, %xmm1
+ %xmm16 = VDIVSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VDIVSDZrr_Int %xmm16, %xmm1
+ %xmm16 = VDIVSDZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VDIVSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VDIVSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VDIVSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VDIVSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VDIVSSZrr %xmm16, %xmm1
+ %xmm16 = VDIVSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VDIVSSZrr_Int %xmm16, %xmm1
+ %xmm16 = VDIVSSZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXCSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXCSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXCSDZrr %xmm16, %xmm1
+ %xmm16 = VMAXCSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXCSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXCSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXCSSZrr %xmm16, %xmm1
+ %xmm16 = VMAXCSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXSDZrr %xmm16, %xmm1
+ %xmm16 = VMAXSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXSDZrr_Int %xmm16, %xmm1
+ %xmm16 = VMAXSDZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMAXSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMAXSSZrr %xmm16, %xmm1
+ %xmm16 = VMAXSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMAXSSZrr_Int %xmm16, %xmm1
+ %xmm16 = VMAXSSZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINCSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINCSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINCSDZrr %xmm16, %xmm1
+ %xmm16 = VMINCSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINCSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINCSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINCSSZrr %xmm16, %xmm1
+ %xmm16 = VMINCSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINSDZrr %xmm16, %xmm1
+ %xmm16 = VMINSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINSDZrr_Int %xmm16, %xmm1
+ %xmm16 = VMINSDZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMINSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMINSSZrr %xmm16, %xmm1
+ %xmm16 = VMINSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMINSSZrr_Int %xmm16, %xmm1
+ %xmm16 = VMINSSZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMULSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMULSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMULSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMULSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMULSDZrr %xmm16, %xmm1
+ %xmm16 = VMULSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMULSDZrr_Int %xmm16, %xmm1
+ %xmm16 = VMULSDZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMULSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMULSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMULSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VMULSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VMULSSZrr %xmm16, %xmm1
+ %xmm16 = VMULSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VMULSSZrr_Int %xmm16, %xmm1
+ %xmm16 = VMULSSZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VSUBSDZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VSUBSDZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VSUBSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VSUBSDZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VSUBSDZrr %xmm16, %xmm1
+ %xmm16 = VSUBSDZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VSUBSDZrr_Int %xmm16, %xmm1
+ %xmm16 = VSUBSDZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VSUBSSZrm %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VSUBSSZrm %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VSUBSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ %xmm16 = VSUBSSZrm_Int %xmm16, %rip, 1, _, %rax, _
+ ; CHECK: %xmm16 = VSUBSSZrr %xmm16, %xmm1
+ %xmm16 = VSUBSSZrr %xmm16, %xmm1
+ ; CHECK: %xmm16 = VSUBSSZrr_Int %xmm16, %xmm1
+ %xmm16 = VSUBSSZrr_Int %xmm16, %xmm1
+ ; CHECK: %xmm16 = VFMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD132SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD132SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD132SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD132SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD132SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD132SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD132SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD132SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD213SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD213SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD213SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD213SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD213SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD213SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD213SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD213SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD231SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD231SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD231SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD231SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMADD231SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD231SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMADD231SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMADD231SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB132SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB132SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB132SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB132SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB132SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB132SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB132SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB132SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB213SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB213SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB213SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB213SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB213SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB213SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB213SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB213SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB231SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB231SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB231SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB231SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFMSUB231SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB231SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFMSUB231SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFMSUB231SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD132SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD132SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD132SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD132SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD132SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD132SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD132SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD132SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD213SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD213SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD213SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD213SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD213SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD213SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD213SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD213SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD231SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD231SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD231SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD231SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMADD231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMADD231SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD231SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMADD231SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMADD231SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB132SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB132SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB132SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB132SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB132SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB132SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB132SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB132SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB132SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB132SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB132SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB132SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB213SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB213SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB213SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB213SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB213SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB213SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB213SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB213SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB213SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB213SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB213SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB213SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB231SDZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB231SDZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB231SDZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB231SDZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB231SDZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB231SDZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB231SSZm %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ %xmm16 = VFNMSUB231SSZm_Int %xmm16, %xmm16, %rsi, 1, _, 0, _
+ ; CHECK: %xmm16 = VFNMSUB231SSZr %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB231SSZr %xmm16, %xmm1, %xmm2
+ ; CHECK: %xmm16 = VFNMSUB231SSZr_Int %xmm16, %xmm1, %xmm2
+ %xmm16 = VFNMSUB231SSZr_Int %xmm16, %xmm1, %xmm2
+ ; CHECK: VPEXTRBZmr %rdi, 1, _, 0, _, %xmm16, 3
+ VPEXTRBZmr %rdi, 1, _, 0, _, %xmm16, 3
+ ; CHECK: %eax = VPEXTRBZrr %xmm16, 1
+ %eax = VPEXTRBZrr %xmm16, 1
+ ; CHECK: VPEXTRDZmr %rdi, 1, _, 0, _, %xmm16, 3
+ VPEXTRDZmr %rdi, 1, _, 0, _, %xmm16, 3
+ ; CHECK: %eax = VPEXTRDZrr %xmm16, 1
+ %eax = VPEXTRDZrr %xmm16, 1
+ ; CHECK: VPEXTRQZmr %rdi, 1, _, 0, _, %xmm16, 3
+ VPEXTRQZmr %rdi, 1, _, 0, _, %xmm16, 3
+ ; CHECK: %rax = VPEXTRQZrr %xmm16, 1
+ %rax = VPEXTRQZrr %xmm16, 1
+ ; CHECK: VPEXTRWZmr %rdi, 1, _, 0, _, %xmm16, 3
+ VPEXTRWZmr %rdi, 1, _, 0, _, %xmm16, 3
+ ; CHECK: %eax = VPEXTRWZrr %xmm16, 1
+ %eax = VPEXTRWZrr %xmm16, 1
+ ; CHECK: %xmm16 = VPINSRBZrm %xmm16, %rsi, 1, _, 0, _, 3
+ %xmm16 = VPINSRBZrm %xmm16, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm16 = VPINSRBZrr %xmm16, %edi, 5
+ %xmm16 = VPINSRBZrr %xmm16, %edi, 5
+ ; CHECK: %xmm16 = VPINSRDZrm %xmm16, %rsi, 1, _, 0, _, 3
+ %xmm16 = VPINSRDZrm %xmm16, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm16 = VPINSRDZrr %xmm16, %edi, 5
+ %xmm16 = VPINSRDZrr %xmm16, %edi, 5
+ ; CHECK: %xmm16 = VPINSRQZrm %xmm16, %rsi, 1, _, 0, _, 3
+ %xmm16 = VPINSRQZrm %xmm16, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm16 = VPINSRQZrr %xmm16, %rdi, 5
+ %xmm16 = VPINSRQZrr %xmm16, %rdi, 5
+ ; CHECK: %xmm16 = VPINSRWZrm %xmm16, %rsi, 1, _, 0, _, 3
+ %xmm16 = VPINSRWZrm %xmm16, %rsi, 1, _, 0, _, 3
+ ; CHECK: %xmm16 = VPINSRWZrr %xmm16, %edi, 5
+ %xmm16 = VPINSRWZrr %xmm16, %edi, 5
+ ; CHECK: %xmm16 = VSQRTSDZm %xmm16, _, _, _, _, _
+ %xmm16 = VSQRTSDZm %xmm16, _, _, _, _, _
+ ; CHECK: %xmm16 = VSQRTSDZm_Int %xmm16, _, _, _, _, _
+ %xmm16 = VSQRTSDZm_Int %xmm16, _, _, _, _, _
+ ; CHECK: %xmm16 = VSQRTSDZr %xmm16, _
+ %xmm16 = VSQRTSDZr %xmm16, _
+ ; CHECK: %xmm16 = VSQRTSDZr_Int %xmm16, _
+ %xmm16 = VSQRTSDZr_Int %xmm16, _
+ ; CHECK: %xmm16 = VSQRTSSZm %xmm16, _, _, _, _, _
+ %xmm16 = VSQRTSSZm %xmm16, _, _, _, _, _
+ ; CHECK: %xmm16 = VSQRTSSZm_Int %xmm16, _, _, _, _, _
+ %xmm16 = VSQRTSSZm_Int %xmm16, _, _, _, _, _
+ ; CHECK: %xmm16 = VSQRTSSZr %xmm16, _
+ %xmm16 = VSQRTSSZr %xmm16, _
+ ; CHECK: %xmm16 = VSQRTSSZr_Int %xmm16, _
+ %xmm16 = VSQRTSSZr_Int %xmm16, _
+ ; CHECK: %rdi = VCVTSD2SI64Zrm %rdi, %xmm16, 1, _, 0
+ %rdi = VCVTSD2SI64Zrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %rdi = VCVTSD2SI64Zrr %xmm16
+ %rdi = VCVTSD2SI64Zrr %xmm16
+ ; CHECK: %edi = VCVTSD2SIZrm %rdi, %xmm16, 1, _, 0
+ %edi = VCVTSD2SIZrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %edi = VCVTSD2SIZrr %xmm16
+ %edi = VCVTSD2SIZrr %xmm16
+ ; CHECK: %xmm16 = VCVTSD2SSZrm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VCVTSD2SSZrm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VCVTSD2SSZrr %xmm16, _
+ %xmm16 = VCVTSD2SSZrr %xmm16, _
+ ; CHECK: %xmm16 = VCVTSI2SDZrm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VCVTSI2SDZrm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VCVTSI2SDZrm_Int %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VCVTSI2SDZrm_Int %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VCVTSI2SDZrr %xmm16, _
+ %xmm16 = VCVTSI2SDZrr %xmm16, _
+ ; CHECK: %xmm16 = VCVTSI2SDZrr_Int %xmm16, _
+ %xmm16 = VCVTSI2SDZrr_Int %xmm16, _
+ ; CHECK: %xmm16 = VCVTSI2SSZrm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VCVTSI2SSZrm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VCVTSI2SSZrm_Int %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VCVTSI2SSZrm_Int %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VCVTSI2SSZrr %xmm16, _
+ %xmm16 = VCVTSI2SSZrr %xmm16, _
+ ; CHECK: %xmm16 = VCVTSI2SSZrr_Int %xmm16, _
+ %xmm16 = VCVTSI2SSZrr_Int %xmm16, _
+ ; CHECK: %xmm16 = VCVTSS2SDZrm %xmm16, %rdi, 1, _, 0, _
+ %xmm16 = VCVTSS2SDZrm %xmm16, %rdi, 1, _, 0, _
+ ; CHECK: %xmm16 = VCVTSS2SDZrr %xmm16, _
+ %xmm16 = VCVTSS2SDZrr %xmm16, _
+ ; CHECK: %rdi = VCVTSS2SI64Zrm %rdi, %xmm16, 1, _, 0
+ %rdi = VCVTSS2SI64Zrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %rdi = VCVTSS2SI64Zrr %xmm16
+ %rdi = VCVTSS2SI64Zrr %xmm16
+ ; CHECK: %edi = VCVTSS2SIZrm %rdi, %xmm16, 1, _, 0
+ %edi = VCVTSS2SIZrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %edi = VCVTSS2SIZrr %xmm16
+ %edi = VCVTSS2SIZrr %xmm16
+ ; CHECK: %rdi = VCVTTSD2SI64Zrm %rdi, %xmm16, 1, _, 0
+ %rdi = VCVTTSD2SI64Zrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %rdi = VCVTTSD2SI64Zrm_Int %rdi, %xmm16, 1, _, 0
+ %rdi = VCVTTSD2SI64Zrm_Int %rdi, %xmm16, 1, _, 0
+ ; CHECK: %rdi = VCVTTSD2SI64Zrr %xmm16
+ %rdi = VCVTTSD2SI64Zrr %xmm16
+ ; CHECK: %rdi = VCVTTSD2SI64Zrr_Int %xmm16
+ %rdi = VCVTTSD2SI64Zrr_Int %xmm16
+ ; CHECK: %edi = VCVTTSD2SIZrm %rdi, %xmm16, 1, _, 0
+ %edi = VCVTTSD2SIZrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %edi = VCVTTSD2SIZrm_Int %rdi, %xmm16, 1, _, 0
+ %edi = VCVTTSD2SIZrm_Int %rdi, %xmm16, 1, _, 0
+ ; CHECK: %edi = VCVTTSD2SIZrr %xmm16
+ %edi = VCVTTSD2SIZrr %xmm16
+ ; CHECK: %edi = VCVTTSD2SIZrr_Int %xmm16
+ %edi = VCVTTSD2SIZrr_Int %xmm16
+ ; CHECK: %rdi = VCVTTSS2SI64Zrm %rdi, %xmm16, 1, _, 0
+ %rdi = VCVTTSS2SI64Zrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %rdi = VCVTTSS2SI64Zrm_Int %rdi, %xmm16, 1, _, 0
+ %rdi = VCVTTSS2SI64Zrm_Int %rdi, %xmm16, 1, _, 0
+ ; CHECK: %rdi = VCVTTSS2SI64Zrr %xmm16
+ %rdi = VCVTTSS2SI64Zrr %xmm16
+ ; CHECK: %rdi = VCVTTSS2SI64Zrr_Int %xmm16
+ %rdi = VCVTTSS2SI64Zrr_Int %xmm16
+ ; CHECK: %edi = VCVTTSS2SIZrm %rdi, %xmm16, 1, _, 0
+ %edi = VCVTTSS2SIZrm %rdi, %xmm16, 1, _, 0
+ ; CHECK: %edi = VCVTTSS2SIZrm_Int %rdi, %xmm16, 1, _, 0
+ %edi = VCVTTSS2SIZrm_Int %rdi, %xmm16, 1, _, 0
+ ; CHECK: %edi = VCVTTSS2SIZrr %xmm16
+ %edi = VCVTTSS2SIZrr %xmm16
+ ; CHECK: %edi = VCVTTSS2SIZrr_Int %xmm16
+ %edi = VCVTTSS2SIZrr_Int %xmm16
+ ; CHECK: %xmm16 = VMOV64toSDZrr %rdi
+ %xmm16 = VMOV64toSDZrr %rdi
+ ; CHECK: %xmm16 = VMOVDI2SSZrm %rip, _, _, _, _
+ %xmm16 = VMOVDI2SSZrm %rip, _, _, _, _
+ ; CHECK: %xmm16 = VMOVDI2SSZrr %eax
+ %xmm16 = VMOVDI2SSZrr %eax
+ ; CHECK: VMOVSDZmr %rdi, %xmm16, _, _, _, _
+ VMOVSDZmr %rdi, %xmm16, _, _, _, _
+ ; CHECK: %xmm16 = VMOVSDZrm %rip, _, _, _, _
+ %xmm16 = VMOVSDZrm %rip, _, _, _, _
+ ; CHECK: %xmm16 = VMOVSDZrr %xmm16, _
+ %xmm16 = VMOVSDZrr %xmm16, _
+ ; CHECK: VMOVSSZmr %rdi, %xmm16, _, _, _, _
+ VMOVSSZmr %rdi, %xmm16, _, _, _, _
+ ; CHECK: %xmm16 = VMOVSSZrm %rip, _, _, _, _
+ %xmm16 = VMOVSSZrm %rip, _, _, _, _
+ ; CHECK: %xmm16 = VMOVSSZrr %xmm16, _
+ %xmm16 = VMOVSSZrr %xmm16, _
+ ; CHECK: %xmm16 = VMOVSSZrr_REV %xmm16, _
+ %xmm16 = VMOVSSZrr_REV %xmm16, _
+ ; CHECK: %xmm16 = VMOV64toPQIZrr %rdi
+ %xmm16 = VMOV64toPQIZrr %rdi
+ ; CHECK: %xmm16 = VMOV64toSDZrr %rdi
+ %xmm16 = VMOV64toSDZrr %rdi
+ ; CHECK: %xmm16 = VMOVDI2PDIZrm %rip, _, _, _, _
+ %xmm16 = VMOVDI2PDIZrm %rip, _, _, _, _
+ ; CHECK: %xmm16 = VMOVDI2PDIZrr %edi
+ %xmm16 = VMOVDI2PDIZrr %edi
+ ; CHECK: %xmm16 = VMOVLHPSZrr %xmm16, _
+ %xmm16 = VMOVLHPSZrr %xmm16, _
+ ; CHECK: %xmm16 = VMOVHLPSZrr %xmm16, _
+ %xmm16 = VMOVHLPSZrr %xmm16, _
+ ; CHECK: VMOVPDI2DIZmr %rdi, %xmm16, _, _, _, _
+ VMOVPDI2DIZmr %rdi, %xmm16, _, _, _, _
+ ; CHECK: %edi = VMOVPDI2DIZrr %xmm16
+ %edi = VMOVPDI2DIZrr %xmm16
+ ; CHECK: VMOVPQI2QIZmr %rdi, %xmm16, _, _, _, _
+ VMOVPQI2QIZmr %rdi, %xmm16, _, _, _, _
+ ; CHECK: %rdi = VMOVPQIto64Zrr %xmm16
+ %rdi = VMOVPQIto64Zrr %xmm16
+ ; CHECK: %xmm16 = VMOVQI2PQIZrm %rip, _, _, _, _
+ %xmm16 = VMOVQI2PQIZrm %rip, _, _, _, _
+ ; CHECK: %xmm16 = VMOVZPQILo2PQIZrr %xmm16
+ %xmm16 = VMOVZPQILo2PQIZrr %xmm16
+ ; CHECK: Int_VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ Int_VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ ; CHECK: Int_VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+ Int_VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+ ; CHECK: Int_VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ Int_VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ ; CHECK: Int_VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ Int_VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: Int_VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+ Int_VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+ ; CHECK: VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ VCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ VCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ ; CHECK: VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ VCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+ VCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+ ; CHECK: VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ VUCOMISDZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ VUCOMISDZrr %xmm16, %xmm1, implicit-def %eflags
+ ; CHECK: VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ VUCOMISSZrm %xmm16, %rdi, _, _, _, _, implicit-def %eflags
+ ; CHECK: VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+ VUCOMISSZrr %xmm16, %xmm1, implicit-def %eflags
+
+ RET 0, %zmm0, %zmm1
+...
Propchange: llvm/trunk/test/CodeGen/X86/evex-to-vex-compress.mir
------------------------------------------------------------------------------
svn:executable = *
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-store.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-store.ll Wed Dec 28 04:12:48 2016
@@ -80,7 +80,7 @@ define <4 x i32> @test_store_4xi32(<4 x
; SKX32-LABEL: test_store_4xi32:
; SKX32: # BB#0:
; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; SKX32-NEXT: vmovdqu64 %xmm0, (%rdi)
+; SKX32-NEXT: vmovdqu %xmm0, (%rdi)
; SKX32-NEXT: retq
%foo = add <4 x i32> %value, %value2 ; to force integer type on store
store <4 x i32> %foo, <4 x i32>* %addr, align 1
@@ -123,7 +123,7 @@ define <4 x i32> @test_store_4xi32_align
; SKX32-LABEL: test_store_4xi32_aligned:
; SKX32: # BB#0:
; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; SKX32-NEXT: vmovdqa64 %xmm0, (%rdi)
+; SKX32-NEXT: vmovdqa %xmm0, (%rdi)
; SKX32-NEXT: retq
%foo = add <4 x i32> %value, %value2 ; to force integer type on store
store <4 x i32> %foo, <4 x i32>* %addr, align 16
Modified: llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll Wed Dec 28 04:12:48 2016
@@ -22,9 +22,8 @@ define double @FsANDPSrr(double %x, doub
;
; AVX512DQ-LABEL: FsANDPSrr:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x54,0xc1]
+; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1]
; AVX512DQ-NEXT: retq # encoding: [0xc3]
-;
%bc1 = bitcast double %x to i64
%bc2 = bitcast double %y to i64
%and = and i64 %bc1, %bc2
@@ -46,9 +45,8 @@ define double @FsANDNPSrr(double %x, dou
;
; AVX512DQ-LABEL: FsANDNPSrr:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vandnps %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf1,0x74,0x08,0x55,0xc0]
+; AVX512DQ-NEXT: vandnps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x55,0xc0]
; AVX512DQ-NEXT: retq # encoding: [0xc3]
-;
%bc1 = bitcast double %x to i64
%bc2 = bitcast double %y to i64
%not = xor i64 %bc2, -1
@@ -70,9 +68,8 @@ define double @FsORPSrr(double %x, doubl
;
; AVX512DQ-LABEL: FsORPSrr:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x56,0xc1]
+; AVX512DQ-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1]
; AVX512DQ-NEXT: retq # encoding: [0xc3]
-;
%bc1 = bitcast double %x to i64
%bc2 = bitcast double %y to i64
%or = or i64 %bc1, %bc2
@@ -93,9 +90,8 @@ define double @FsXORPSrr(double %x, doub
;
; AVX512DQ-LABEL: FsXORPSrr:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x57,0xc1]
+; AVX512DQ-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1]
; AVX512DQ-NEXT: retq # encoding: [0xc3]
-;
%bc1 = bitcast double %x to i64
%bc2 = bitcast double %y to i64
%xor = xor i64 %bc1, %bc2
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Wed Dec 28 04:12:48 2016
@@ -310,7 +310,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8
; SKX-NEXT: kxnorw %k0, %k0, %k2
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
-; SKX-NEXT: vmovdqa64 %ymm2, %ymm0
+; SKX-NEXT: vmovdqa %ymm2, %ymm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test6:
@@ -319,7 +319,7 @@ define <8 x i32> @test6(<8 x i32>%a1, <8
; SKX_32-NEXT: kxnorw %k0, %k0, %k2
; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2}
; SKX_32-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1}
-; SKX_32-NEXT: vmovdqa64 %ymm2, %ymm0
+; SKX_32-NEXT: vmovdqa %ymm2, %ymm0
; SKX_32-NEXT: retl
%a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
@@ -359,7 +359,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; SKX-NEXT: kmovb %esi, %k1
; SKX-NEXT: kmovw %k1, %k2
; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2}
-; SKX-NEXT: vmovdqa64 %ymm1, %ymm2
+; SKX-NEXT: vmovdqa %ymm1, %ymm2
; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1}
; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; SKX-NEXT: retq
@@ -370,7 +370,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
; SKX_32-NEXT: kmovw %k1, %k2
; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2}
-; SKX_32-NEXT: vmovdqa64 %ymm1, %ymm2
+; SKX_32-NEXT: vmovdqa %ymm1, %ymm2
; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1}
; SKX_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; SKX_32-NEXT: retl
@@ -1233,7 +1233,7 @@ define <2 x i32> @test23(i32* %base, <2
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
-; SKX-NEXT: vmovdqa64 %xmm2, %xmm0
+; SKX-NEXT: vmovdqa %xmm2, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test23:
@@ -1242,7 +1242,7 @@ define <2 x i32> @test23(i32* %base, <2
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
-; SKX_32-NEXT: vmovdqa64 %xmm2, %xmm0
+; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
@@ -1276,7 +1276,7 @@ define <2 x i32> @test24(i32* %base, <2
; SKX: # BB#0:
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
-; SKX-NEXT: vmovdqa64 %xmm1, %xmm0
+; SKX-NEXT: vmovdqa %xmm1, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test24:
@@ -1284,7 +1284,7 @@ define <2 x i32> @test24(i32* %base, <2
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
-; SKX_32-NEXT: vmovdqa64 %xmm1, %xmm0
+; SKX_32-NEXT: vmovdqa %xmm1, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
@@ -1324,7 +1324,7 @@ define <2 x i64> @test25(i64* %base, <2
; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
-; SKX-NEXT: vmovdqa64 %xmm2, %xmm0
+; SKX-NEXT: vmovdqa %xmm2, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test25:
@@ -1333,7 +1333,7 @@ define <2 x i64> @test25(i64* %base, <2
; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
-; SKX_32-NEXT: vmovdqa64 %xmm2, %xmm0
+; SKX_32-NEXT: vmovdqa %xmm2, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
@@ -1370,7 +1370,7 @@ define <2 x i64> @test26(i64* %base, <2
; SKX: # BB#0:
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
-; SKX-NEXT: vmovdqa64 %xmm1, %xmm0
+; SKX-NEXT: vmovdqa %xmm1, %xmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test26:
@@ -1378,7 +1378,7 @@ define <2 x i64> @test26(i64* %base, <2
; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
-; SKX_32-NEXT: vmovdqa64 %xmm1, %xmm0
+; SKX_32-NEXT: vmovdqa %xmm1, %xmm0
; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Wed Dec 28 04:12:48 2016
@@ -27,7 +27,7 @@ define <2 x double> @test6(<2 x i64> %tr
;
; SKX-LABEL: test6:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vmovupd (%rdi), %xmm1 {%k1}
; SKX-NEXT: vmovapd %xmm1, %xmm0
@@ -56,7 +56,7 @@ define <4 x float> @test7(<4 x i32> %tri
;
; SKX-LABEL: test7:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1}
; SKX-NEXT: vmovaps %xmm1, %xmm0
@@ -93,10 +93,10 @@ define <4 x i32> @test8(<4 x i32> %trigg
;
; SKX-LABEL: test8:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1}
-; SKX-NEXT: vmovdqa64 %xmm1, %xmm0
+; SKX-NEXT: vmovdqa %xmm1, %xmm0
; SKX-NEXT: retq
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
@@ -127,7 +127,7 @@ define void @test9(<4 x i32> %trigger, <
;
; SKX-LABEL: test9:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovdqu32 %xmm1, (%rdi) {%k1}
; SKX-NEXT: retq
@@ -169,7 +169,7 @@ define <4 x double> @test10(<4 x i32> %t
;
; SKX-LABEL: test10:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
; SKX-NEXT: vmovapd (%rdi), %ymm1 {%k1}
; SKX-NEXT: vmovapd %ymm1, %ymm0
@@ -209,7 +209,7 @@ define <4 x double> @test10b(<4 x i32> %
;
; SKX-LABEL: test10b:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; SKX-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z}
; SKX-NEXT: retq
@@ -252,7 +252,7 @@ define <8 x float> @test11a(<8 x i32> %t
;
; SKX-LABEL: test11a:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2
+; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2
; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; SKX-NEXT: vmovaps (%rdi), %ymm1 {%k1}
; SKX-NEXT: vmovaps %ymm1, %ymm0
@@ -302,7 +302,7 @@ define <8 x i32> @test11b(<8 x i1> %mask
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
; SKX-NEXT: vpmovw2m %xmm0, %k1
; SKX-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1}
-; SKX-NEXT: vmovdqa64 %ymm1, %ymm0
+; SKX-NEXT: vmovdqa %ymm1, %ymm0
; SKX-NEXT: retq
%res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
ret <8 x i32> %res
@@ -425,7 +425,7 @@ define void @test12(<8 x i32> %trigger,
;
; SKX-LABEL: test12:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %ymm2, %ymm2, %ymm2
+; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2
; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; SKX-NEXT: vmovdqu32 %ymm1, (%rdi) {%k1}
; SKX-NEXT: retq
@@ -464,7 +464,7 @@ define void @test14(<2 x i32> %trigger,
;
; SKX-LABEL: test14:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
@@ -509,7 +509,7 @@ define void @test15(<2 x i32> %trigger,
;
; SKX-LABEL: test15:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
@@ -552,7 +552,7 @@ define <2 x float> @test16(<2 x i32> %tr
;
; SKX-LABEL: test16:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
@@ -604,7 +604,7 @@ define <2 x i32> @test17(<2 x i32> %trig
;
; SKX-LABEL: test17:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
@@ -648,7 +648,7 @@ define <2 x float> @test18(<2 x i32> %tr
;
; SKX-LABEL: test18:
; SKX: ## BB#0:
-; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftlw $14, %k0, %k0
Modified: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-2.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll Wed Dec 28 04:12:48 2016
@@ -116,7 +116,7 @@ define void @test_zero_v4f32(<4 x float>
;
; VLX-LABEL: test_zero_v4f32:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
; VLX-NEXT: retq
store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
@@ -138,7 +138,7 @@ define void @test_zero_v4i32(<4 x i32>*
;
; VLX-LABEL: test_zero_v4i32:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
; VLX-NEXT: retq
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
@@ -161,7 +161,7 @@ define void @test_zero_v2f64(<2 x double
;
; VLX-LABEL: test_zero_v2f64:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
@@ -183,7 +183,7 @@ define void @test_zero_v2i64(<2 x i64>*
;
; VLX-LABEL: test_zero_v2i64:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
; VLX-NEXT: retq
store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
@@ -205,7 +205,7 @@ define void @test_zero_v8i16(<8 x i16>*
;
; VLX-LABEL: test_zero_v8i16:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
; VLX-NEXT: retq
store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
@@ -227,7 +227,7 @@ define void @test_zero_v16i8(<16 x i8>*
;
; VLX-LABEL: test_zero_v16i8:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; VLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; VLX-NEXT: vmovntdq %xmm0, (%rdi)
; VLX-NEXT: retq
store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
@@ -253,7 +253,7 @@ define void @test_zero_v8f32(<8 x float>
;
; VLX-LABEL: test_zero_v8f32:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0
+; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
; VLX-NEXT: retq
store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1
@@ -277,7 +277,7 @@ define void @test_zero_v8i32(<8 x i32>*
;
; VLX-LABEL: test_zero_v8i32:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0
+; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
; VLX-NEXT: retq
store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1
@@ -301,7 +301,7 @@ define void @test_zero_v4f64(<4 x double
;
; VLX-LABEL: test_zero_v4f64:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0
+; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
; VLX-NEXT: retq
store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1
@@ -325,7 +325,7 @@ define void @test_zero_v4i64(<4 x i64>*
;
; VLX-LABEL: test_zero_v4i64:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0
+; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
; VLX-NEXT: retq
store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1
@@ -349,7 +349,7 @@ define void @test_zero_v16i16(<16 x i16>
;
; VLX-LABEL: test_zero_v16i16:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0
+; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
; VLX-NEXT: retq
store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1
@@ -373,7 +373,7 @@ define void @test_zero_v32i8(<32 x i8>*
;
; VLX-LABEL: test_zero_v32i8:
; VLX: # BB#0:
-; VLX-NEXT: vpxord %ymm0, %ymm0, %ymm0
+; VLX-NEXT: vpxor %ymm0, %ymm0, %ymm0
; VLX-NEXT: vmovntdq %ymm0, (%rdi)
; VLX-NEXT: retq
store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1
Modified: llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-intrinsics-x86.ll Wed Dec 28 04:12:48 2016
@@ -57,7 +57,7 @@ define i32 @test_x86_sse_comieq_ss(<4 x
;
; SKX-LABEL: test_x86_sse_comieq_ss:
; SKX: ## BB#0:
-; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
@@ -87,7 +87,7 @@ define i32 @test_x86_sse_comige_ss(<4 x
; SKX-LABEL: test_x86_sse_comige_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -114,7 +114,7 @@ define i32 @test_x86_sse_comigt_ss(<4 x
; SKX-LABEL: test_x86_sse_comigt_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -141,7 +141,7 @@ define i32 @test_x86_sse_comile_ss(<4 x
; SKX-LABEL: test_x86_sse_comile_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc8]
+; SKX-NEXT: vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -168,7 +168,7 @@ define i32 @test_x86_sse_comilt_ss(<4 x
; SKX-LABEL: test_x86_sse_comilt_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc8]
+; SKX-NEXT: vcomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -198,7 +198,7 @@ define i32 @test_x86_sse_comineq_ss(<4 x
;
; SKX-LABEL: test_x86_sse_comineq_ss:
; SKX: ## BB#0:
-; SKX-NEXT: vcomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1]
; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
@@ -226,7 +226,7 @@ define <4 x float> @test_x86_sse_cvtsi2s
; SKX-LABEL: test_x86_sse_cvtsi2ss:
; SKX: ## BB#0:
; SKX-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00]
-; SKX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x2a,0xc0]
+; SKX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -247,7 +247,7 @@ define i32 @test_x86_sse_cvtss2si(<4 x f
;
; SKX-LABEL: test_x86_sse_cvtss2si:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtss2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7e,0x08,0x2d,0xc0]
+; SKX-NEXT: vcvtss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
ret i32 %res
@@ -268,7 +268,7 @@ define i32 @test_x86_sse_cvttss2si(<4 x
;
; SKX-LABEL: test_x86_sse_cvttss2si:
; SKX: ## BB#0:
-; SKX-NEXT: vcvttss2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7e,0x08,0x2c,0xc0]
+; SKX-NEXT: vcvttss2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
ret i32 %res
@@ -308,7 +308,7 @@ define <4 x float> @test_x86_sse_max_ps(
;
; SKX-LABEL: test_x86_sse_max_ps:
; SKX: ## BB#0:
-; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5f,0xc1]
+; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -345,7 +345,7 @@ define <4 x float> @test_x86_sse_min_ps(
;
; SKX-LABEL: test_x86_sse_min_ps:
; SKX: ## BB#0:
-; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5d,0xc1]
+; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -531,7 +531,7 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x
;
; SKX-LABEL: test_x86_sse_ucomieq_ss:
; SKX: ## BB#0:
-; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
@@ -561,7 +561,7 @@ define i32 @test_x86_sse_ucomige_ss(<4 x
; SKX-LABEL: test_x86_sse_ucomige_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -588,7 +588,7 @@ define i32 @test_x86_sse_ucomigt_ss(<4 x
; SKX-LABEL: test_x86_sse_ucomigt_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -615,7 +615,7 @@ define i32 @test_x86_sse_ucomile_ss(<4 x
; SKX-LABEL: test_x86_sse_ucomile_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc8]
+; SKX-NEXT: vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -642,7 +642,7 @@ define i32 @test_x86_sse_ucomilt_ss(<4 x
; SKX-LABEL: test_x86_sse_ucomilt_ss:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomiss %xmm0, %xmm1 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc8]
+; SKX-NEXT: vucomiss %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
@@ -672,7 +672,7 @@ define i32 @test_x86_sse_ucomineq_ss(<4
;
; SKX-LABEL: test_x86_sse_ucomineq_ss:
; SKX: ## BB#0:
-; SKX-NEXT: vucomiss %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
Modified: llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll Wed Dec 28 04:12:48 2016
@@ -56,7 +56,7 @@ define i32 @test_x86_sse2_comieq_sd(<2 x
;
; SKX-LABEL: test_x86_sse2_comieq_sd:
; SKX: ## BB#0:
-; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
@@ -86,7 +86,7 @@ define i32 @test_x86_sse2_comige_sd(<2 x
; SKX-LABEL: test_x86_sse2_comige_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -113,7 +113,7 @@ define i32 @test_x86_sse2_comigt_sd(<2 x
; SKX-LABEL: test_x86_sse2_comigt_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -140,7 +140,7 @@ define i32 @test_x86_sse2_comile_sd(<2 x
; SKX-LABEL: test_x86_sse2_comile_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc8]
+; SKX-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -167,7 +167,7 @@ define i32 @test_x86_sse2_comilt_sd(<2 x
; SKX-LABEL: test_x86_sse2_comilt_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vcomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc8]
+; SKX-NEXT: vcomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -197,7 +197,7 @@ define i32 @test_x86_sse2_comineq_sd(<2
;
; SKX-LABEL: test_x86_sse2_comineq_sd:
; SKX: ## BB#0:
-; SKX-NEXT: vcomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2f,0xc1]
+; SKX-NEXT: vcomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1]
; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
@@ -222,7 +222,7 @@ define <4 x float> @test_x86_sse2_cvtdq2
;
; SKX-LABEL: test_x86_sse2_cvtdq2ps:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0]
+; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -243,7 +243,7 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq
;
; SKX-LABEL: test_x86_sse2_cvtpd2dq:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0]
+; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -264,7 +264,7 @@ define <2 x i64> @test_mm_cvtpd_epi32_ze
;
; SKX-LABEL: test_mm_cvtpd_epi32_zext:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0]
+; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -286,7 +286,7 @@ define <4 x float> @test_x86_sse2_cvtpd2
;
; SKX-LABEL: test_x86_sse2_cvtpd2ps:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0]
+; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -306,7 +306,7 @@ define <4 x float> @test_x86_sse2_cvtpd2
;
; SKX-LABEL: test_x86_sse2_cvtpd2ps_zext:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0]
+; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
%res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -342,7 +342,7 @@ define i32 @test_x86_sse2_cvtsd2si(<2 x
;
; SKX-LABEL: test_x86_sse2_cvtsd2si:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtsd2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7f,0x08,0x2d,0xc0]
+; SKX-NEXT: vcvtsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
@@ -415,7 +415,7 @@ define <2 x double> @test_x86_sse2_cvtsi
;
; SKX-LABEL: test_x86_sse2_cvtsi2sd:
; SKX: ## BB#0:
-; SKX-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x2a,0x44,0x24,0x01]
+; SKX-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -488,7 +488,7 @@ define <4 x i32> @test_x86_sse2_cvttpd2d
;
; SKX-LABEL: test_x86_sse2_cvttpd2dq:
; SKX: ## BB#0:
-; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
+; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -509,7 +509,7 @@ define <2 x i64> @test_mm_cvttpd_epi32_z
;
; SKX-LABEL: test_mm_cvttpd_epi32_zext:
; SKX: ## BB#0:
-; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0]
+; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -531,7 +531,7 @@ define <4 x i32> @test_x86_sse2_cvttps2d
;
; SKX-LABEL: test_x86_sse2_cvttps2dq:
; SKX: ## BB#0:
-; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0]
+; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -552,7 +552,7 @@ define i32 @test_x86_sse2_cvttsd2si(<2 x
;
; SKX-LABEL: test_x86_sse2_cvttsd2si:
; SKX: ## BB#0:
-; SKX-NEXT: vcvttsd2si %xmm0, %eax ## encoding: [0x62,0xf1,0x7f,0x08,0x2c,0xc0]
+; SKX-NEXT: vcvttsd2si %xmm0, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
@@ -573,7 +573,7 @@ define <2 x double> @test_x86_sse2_max_p
;
; SKX-LABEL: test_x86_sse2_max_pd:
; SKX: ## BB#0:
-; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5f,0xc1]
+; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -610,7 +610,7 @@ define <2 x double> @test_x86_sse2_min_p
;
; SKX-LABEL: test_x86_sse2_min_pd:
; SKX: ## BB#0:
-; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5d,0xc1]
+; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -665,7 +665,7 @@ define <8 x i16> @test_x86_sse2_packssdw
;
; SKX-LABEL: test_x86_sse2_packssdw_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1]
+; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -686,7 +686,7 @@ define <16 x i8> @test_x86_sse2_packsswb
;
; SKX-LABEL: test_x86_sse2_packsswb_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0xc1]
+; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -707,7 +707,7 @@ define <16 x i8> @test_x86_sse2_packuswb
;
; SKX-LABEL: test_x86_sse2_packuswb_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0xc1]
+; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -728,7 +728,7 @@ define <16 x i8> @test_x86_sse2_padds_b(
;
; SKX-LABEL: test_x86_sse2_padds_b:
; SKX: ## BB#0:
-; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0xc1]
+; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -749,7 +749,7 @@ define <8 x i16> @test_x86_sse2_padds_w(
;
; SKX-LABEL: test_x86_sse2_padds_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0xc1]
+; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -770,7 +770,7 @@ define <16 x i8> @test_x86_sse2_paddus_b
;
; SKX-LABEL: test_x86_sse2_paddus_b:
; SKX: ## BB#0:
-; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0xc1]
+; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -791,7 +791,7 @@ define <8 x i16> @test_x86_sse2_paddus_w
;
; SKX-LABEL: test_x86_sse2_paddus_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0xc1]
+; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -812,7 +812,7 @@ define <16 x i8> @test_x86_sse2_pavg_b(<
;
; SKX-LABEL: test_x86_sse2_pavg_b:
; SKX: ## BB#0:
-; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe0,0xc1]
+; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -833,7 +833,7 @@ define <8 x i16> @test_x86_sse2_pavg_w(<
;
; SKX-LABEL: test_x86_sse2_pavg_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe3,0xc1]
+; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -854,7 +854,7 @@ define <4 x i32> @test_x86_sse2_pmadd_wd
;
; SKX-LABEL: test_x86_sse2_pmadd_wd:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf5,0xc1]
+; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -875,7 +875,7 @@ define <8 x i16> @test_x86_sse2_pmaxs_w(
;
; SKX-LABEL: test_x86_sse2_pmaxs_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xee,0xc1]
+; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -896,7 +896,7 @@ define <16 x i8> @test_x86_sse2_pmaxu_b(
;
; SKX-LABEL: test_x86_sse2_pmaxu_b:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xde,0xc1]
+; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -917,7 +917,7 @@ define <8 x i16> @test_x86_sse2_pmins_w(
;
; SKX-LABEL: test_x86_sse2_pmins_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xea,0xc1]
+; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -938,7 +938,7 @@ define <16 x i8> @test_x86_sse2_pminu_b(
;
; SKX-LABEL: test_x86_sse2_pminu_b:
; SKX: ## BB#0:
-; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xda,0xc1]
+; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -975,7 +975,7 @@ define <8 x i16> @test_x86_sse2_pmulh_w(
;
; SKX-LABEL: test_x86_sse2_pmulh_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe5,0xc1]
+; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -996,7 +996,7 @@ define <8 x i16> @test_x86_sse2_pmulhu_w
;
; SKX-LABEL: test_x86_sse2_pmulhu_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe4,0xc1]
+; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1017,7 +1017,7 @@ define <2 x i64> @test_x86_sse2_pmulu_dq
;
; SKX-LABEL: test_x86_sse2_pmulu_dq:
; SKX: ## BB#0:
-; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0xc1]
+; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
@@ -1038,7 +1038,7 @@ define <2 x i64> @test_x86_sse2_psad_bw(
;
; SKX-LABEL: test_x86_sse2_psad_bw:
; SKX: ## BB#0:
-; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf6,0xc1]
+; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
@@ -1059,7 +1059,7 @@ define <4 x i32> @test_x86_sse2_psll_d(<
;
; SKX-LABEL: test_x86_sse2_psll_d:
; SKX: ## BB#0:
-; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf2,0xc1]
+; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -1080,7 +1080,7 @@ define <2 x i64> @test_x86_sse2_psll_q(<
;
; SKX-LABEL: test_x86_sse2_psll_q:
; SKX: ## BB#0:
-; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf3,0xc1]
+; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
@@ -1101,7 +1101,7 @@ define <8 x i16> @test_x86_sse2_psll_w(<
;
; SKX-LABEL: test_x86_sse2_psll_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf1,0xc1]
+; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1122,7 +1122,7 @@ define <4 x i32> @test_x86_sse2_pslli_d(
;
; SKX-LABEL: test_x86_sse2_pslli_d:
; SKX: ## BB#0:
-; SKX-NEXT: vpslld $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xf0,0x07]
+; SKX-NEXT: vpslld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -1143,7 +1143,7 @@ define <2 x i64> @test_x86_sse2_pslli_q(
;
; SKX-LABEL: test_x86_sse2_pslli_q:
; SKX: ## BB#0:
-; SKX-NEXT: vpsllq $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xf0,0x07]
+; SKX-NEXT: vpsllq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
@@ -1164,7 +1164,7 @@ define <8 x i16> @test_x86_sse2_pslli_w(
;
; SKX-LABEL: test_x86_sse2_pslli_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xf0,0x07]
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1185,7 +1185,7 @@ define <4 x i32> @test_x86_sse2_psra_d(<
;
; SKX-LABEL: test_x86_sse2_psra_d:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe2,0xc1]
+; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -1206,7 +1206,7 @@ define <8 x i16> @test_x86_sse2_psra_w(<
;
; SKX-LABEL: test_x86_sse2_psra_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe1,0xc1]
+; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1227,7 +1227,7 @@ define <4 x i32> @test_x86_sse2_psrai_d(
;
; SKX-LABEL: test_x86_sse2_psrai_d:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrad $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xe0,0x07]
+; SKX-NEXT: vpsrad $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -1248,7 +1248,7 @@ define <8 x i16> @test_x86_sse2_psrai_w(
;
; SKX-LABEL: test_x86_sse2_psrai_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsraw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xe0,0x07]
+; SKX-NEXT: vpsraw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1269,7 +1269,7 @@ define <4 x i32> @test_x86_sse2_psrl_d(<
;
; SKX-LABEL: test_x86_sse2_psrl_d:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd2,0xc1]
+; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -1290,7 +1290,7 @@ define <2 x i64> @test_x86_sse2_psrl_q(<
;
; SKX-LABEL: test_x86_sse2_psrl_q:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd3,0xc1]
+; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
@@ -1311,7 +1311,7 @@ define <8 x i16> @test_x86_sse2_psrl_w(<
;
; SKX-LABEL: test_x86_sse2_psrl_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd1,0xc1]
+; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1332,7 +1332,7 @@ define <4 x i32> @test_x86_sse2_psrli_d(
;
; SKX-LABEL: test_x86_sse2_psrli_d:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrld $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xd0,0x07]
+; SKX-NEXT: vpsrld $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -1353,7 +1353,7 @@ define <2 x i64> @test_x86_sse2_psrli_q(
;
; SKX-LABEL: test_x86_sse2_psrli_q:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrlq $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xd0,0x07]
+; SKX-NEXT: vpsrlq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
@@ -1374,7 +1374,7 @@ define <8 x i16> @test_x86_sse2_psrli_w(
;
; SKX-LABEL: test_x86_sse2_psrli_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsrlw $7, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xd0,0x07]
+; SKX-NEXT: vpsrlw $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x07]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1395,7 +1395,7 @@ define <16 x i8> @test_x86_sse2_psubs_b(
;
; SKX-LABEL: test_x86_sse2_psubs_b:
; SKX: ## BB#0:
-; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0xc1]
+; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -1416,7 +1416,7 @@ define <8 x i16> @test_x86_sse2_psubs_w(
;
; SKX-LABEL: test_x86_sse2_psubs_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0xc1]
+; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1437,7 +1437,7 @@ define <16 x i8> @test_x86_sse2_psubus_b
;
; SKX-LABEL: test_x86_sse2_psubus_b:
; SKX: ## BB#0:
-; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0xc1]
+; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -1458,7 +1458,7 @@ define <8 x i16> @test_x86_sse2_psubus_w
;
; SKX-LABEL: test_x86_sse2_psubus_w:
; SKX: ## BB#0:
-; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0xc1]
+; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -1516,7 +1516,7 @@ define <2 x double> @test_x86_sse2_sqrt_
; SKX-LABEL: test_x86_sse2_sqrt_sd_vec_load:
; SKX: ## BB#0:
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SKX-NEXT: vmovaps (%eax), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x00]
+; SKX-NEXT: vmovaps (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%a1 = load <2 x double>, <2 x double>* %a0, align 16
@@ -1546,7 +1546,7 @@ define i32 @test_x86_sse2_ucomieq_sd(<2
;
; SKX-LABEL: test_x86_sse2_ucomieq_sd:
; SKX: ## BB#0:
-; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
; SKX-NEXT: setnp %al ## encoding: [0x0f,0x9b,0xc0]
; SKX-NEXT: sete %cl ## encoding: [0x0f,0x94,0xc1]
; SKX-NEXT: andb %al, %cl ## encoding: [0x20,0xc1]
@@ -1576,7 +1576,7 @@ define i32 @test_x86_sse2_ucomige_sd(<2
; SKX-LABEL: test_x86_sse2_ucomige_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1603,7 +1603,7 @@ define i32 @test_x86_sse2_ucomigt_sd(<2
; SKX-LABEL: test_x86_sse2_ucomigt_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1630,7 +1630,7 @@ define i32 @test_x86_sse2_ucomile_sd(<2
; SKX-LABEL: test_x86_sse2_ucomile_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc8]
+; SKX-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
; SKX-NEXT: setae %al ## encoding: [0x0f,0x93,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1657,7 +1657,7 @@ define i32 @test_x86_sse2_ucomilt_sd(<2
; SKX-LABEL: test_x86_sse2_ucomilt_sd:
; SKX: ## BB#0:
; SKX-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; SKX-NEXT: vucomisd %xmm0, %xmm1 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc8]
+; SKX-NEXT: vucomisd %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8]
; SKX-NEXT: seta %al ## encoding: [0x0f,0x97,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
@@ -1687,7 +1687,7 @@ define i32 @test_x86_sse2_ucomineq_sd(<2
;
; SKX-LABEL: test_x86_sse2_ucomineq_sd:
; SKX: ## BB#0:
-; SKX-NEXT: vucomisd %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x2e,0xc1]
+; SKX-NEXT: vucomisd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1]
; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll Wed Dec 28 04:12:48 2016
@@ -127,7 +127,7 @@ define <8 x i16> @test_x86_sse41_packusd
;
; SKX-LABEL: test_x86_sse41_packusdw:
; SKX: ## BB#0:
-; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0xc1]
+; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -183,7 +183,7 @@ define <16 x i8> @test_x86_sse41_pmaxsb(
;
; SKX-LABEL: test_x86_sse41_pmaxsb:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3c,0xc1]
+; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -204,7 +204,7 @@ define <4 x i32> @test_x86_sse41_pmaxsd(
;
; SKX-LABEL: test_x86_sse41_pmaxsd:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3d,0xc1]
+; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -225,7 +225,7 @@ define <4 x i32> @test_x86_sse41_pmaxud(
;
; SKX-LABEL: test_x86_sse41_pmaxud:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3f,0xc1]
+; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -246,7 +246,7 @@ define <8 x i16> @test_x86_sse41_pmaxuw(
;
; SKX-LABEL: test_x86_sse41_pmaxuw:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3e,0xc1]
+; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -267,7 +267,7 @@ define <16 x i8> @test_x86_sse41_pminsb(
;
; SKX-LABEL: test_x86_sse41_pminsb:
; SKX: ## BB#0:
-; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x38,0xc1]
+; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -288,7 +288,7 @@ define <4 x i32> @test_x86_sse41_pminsd(
;
; SKX-LABEL: test_x86_sse41_pminsd:
; SKX: ## BB#0:
-; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x39,0xc1]
+; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -309,7 +309,7 @@ define <4 x i32> @test_x86_sse41_pminud(
;
; SKX-LABEL: test_x86_sse41_pminud:
; SKX: ## BB#0:
-; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3b,0xc1]
+; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -330,7 +330,7 @@ define <8 x i16> @test_x86_sse41_pminuw(
;
; SKX-LABEL: test_x86_sse41_pminuw:
; SKX: ## BB#0:
-; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3a,0xc1]
+; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -351,7 +351,7 @@ define <2 x i64> @test_x86_sse41_pmuldq(
;
; SKX-LABEL: test_x86_sse41_pmuldq:
; SKX: ## BB#0:
-; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1]
+; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll Wed Dec 28 04:12:48 2016
@@ -52,7 +52,7 @@ define i32 @test_x86_sse42_pcmpestri128_
; SKX: ## BB#0:
; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; SKX-NEXT: vmovdqu8 (%eax), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x00]
+; SKX-NEXT: vmovdqu (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x00]
; SKX-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00]
; SKX-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00]
; SKX-NEXT: vpcmpestri $7, (%ecx), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x61,0x01,0x07]
@@ -292,7 +292,7 @@ define i32 @test_x86_sse42_pcmpistri128_
; SKX: ## BB#0:
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
-; SKX-NEXT: vmovdqu8 (%ecx), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x01]
+; SKX-NEXT: vmovdqu (%ecx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
; SKX-NEXT: vpcmpistri $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x63,0x00,0x07]
; SKX-NEXT: movl %ecx, %eax ## encoding: [0x89,0xc8]
; SKX-NEXT: retl ## encoding: [0xc3]
Modified: llvm/trunk/test/CodeGen/X86/ssse3-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ssse3-intrinsics-x86.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ssse3-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ssse3-intrinsics-x86.ll Wed Dec 28 04:12:48 2016
@@ -16,7 +16,7 @@ define <16 x i8> @test_x86_ssse3_pabs_b_
;
; SKX-LABEL: test_x86_ssse3_pabs_b_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1c,0xc0]
+; SKX-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
@@ -37,7 +37,7 @@ define <4 x i32> @test_x86_ssse3_pabs_d_
;
; SKX-LABEL: test_x86_ssse3_pabs_d_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1e,0xc0]
+; SKX-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
@@ -58,7 +58,7 @@ define <8 x i16> @test_x86_ssse3_pabs_w_
;
; SKX-LABEL: test_x86_ssse3_pabs_w_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1d,0xc0]
+; SKX-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -175,7 +175,7 @@ define <8 x i16> @test_x86_ssse3_pmadd_u
;
; SKX-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x04,0xc1]
+; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -196,7 +196,7 @@ define <8 x i16> @test_x86_ssse3_pmul_hr
;
; SKX-LABEL: test_x86_ssse3_pmul_hr_sw_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0b,0xc1]
+; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -217,7 +217,7 @@ define <16 x i8> @test_x86_ssse3_pshuf_b
;
; SKX-LABEL: test_x86_ssse3_pshuf_b_128:
; SKX: ## BB#0:
-; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x00,0xc1]
+; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1]
; SKX-NEXT: retl ## encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll Wed Dec 28 04:12:48 2016
@@ -558,7 +558,7 @@ define <32 x i16> @test_broadcast_8i16_3
; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
+; X32-AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_8i16_32i16:
@@ -571,7 +571,7 @@ define <32 x i16> @test_broadcast_8i16_3
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
+; X32-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX1-LABEL: test_broadcast_8i16_32i16:
@@ -589,7 +589,7 @@ define <32 x i16> @test_broadcast_8i16_3
; X64-AVX512F-LABEL: test_broadcast_8i16_32i16:
; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
+; X64-AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_8i16_32i16:
@@ -600,7 +600,7 @@ define <32 x i16> @test_broadcast_8i16_3
; X64-AVX512DQ-LABEL: test_broadcast_8i16_32i16:
; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
+; X64-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1
; X64-AVX512DQ-NEXT: retq
%1 = load <8 x i16>, <8 x i16> *%p
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -708,7 +708,7 @@ define <64 x i8> @test_broadcast_16i8_64
; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
+; X32-AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_16i8_64i8:
@@ -721,7 +721,7 @@ define <64 x i8> @test_broadcast_16i8_64
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
+; X32-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX1-LABEL: test_broadcast_16i8_64i8:
@@ -739,7 +739,7 @@ define <64 x i8> @test_broadcast_16i8_64
; X64-AVX512F-LABEL: test_broadcast_16i8_64i8:
; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512F-NEXT: vmovdqa64 %ymm0, %ymm1
+; X64-AVX512F-NEXT: vmovdqa %ymm0, %ymm1
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_16i8_64i8:
@@ -750,7 +750,7 @@ define <64 x i8> @test_broadcast_16i8_64
; X64-AVX512DQ-LABEL: test_broadcast_16i8_64i8:
; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512DQ-NEXT: vmovdqa64 %ymm0, %ymm1
+; X64-AVX512DQ-NEXT: vmovdqa %ymm0, %ymm1
; X64-AVX512DQ-NEXT: retq
%1 = load <16 x i8>, <16 x i8> *%p
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -900,8 +900,8 @@ define <4 x i64> @test_broadcast_2i64_4i
; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512F-NEXT: vmovdqa64 %xmm0, (%eax)
+; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
@@ -909,8 +909,8 @@ define <4 x i64> @test_broadcast_2i64_4i
; X32-AVX512BW: ## BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512BW-NEXT: vmovdqa64 %xmm0, (%eax)
+; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512BW-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
@@ -918,8 +918,8 @@ define <4 x i64> @test_broadcast_2i64_4i
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512DQ-NEXT: vmovdqa64 %xmm0, (%eax)
+; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
@@ -932,22 +932,22 @@ define <4 x i64> @test_broadcast_2i64_4i
;
; X64-AVX512F-LABEL: test_broadcast_2i64_4i64_reuse:
; X64-AVX512F: ## BB#0:
-; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512F-NEXT: vmovdqa64 %xmm0, (%rsi)
+; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_2i64_4i64_reuse:
; X64-AVX512BW: ## BB#0:
-; X64-AVX512BW-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512BW-NEXT: vmovdqa64 %xmm0, (%rsi)
+; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512BW-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_2i64_4i64_reuse:
; X64-AVX512DQ: ## BB#0:
-; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512DQ-NEXT: vmovdqa64 %xmm0, (%rsi)
+; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512DQ-NEXT: vinserti64x2 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %p0
@@ -1008,8 +1008,8 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512: ## BB#0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512-NEXT: vmovdqa32 (%ecx), %xmm0
-; X32-AVX512-NEXT: vmovdqa32 %xmm0, (%eax)
+; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512-NEXT: retl
;
@@ -1022,8 +1022,8 @@ define <8 x i32> @test_broadcast_4i32_8i
;
; X64-AVX512-LABEL: test_broadcast_4i32_8i32_reuse:
; X64-AVX512: ## BB#0:
-; X64-AVX512-NEXT: vmovdqa32 (%rdi), %xmm0
-; X64-AVX512-NEXT: vmovdqa32 %xmm0, (%rsi)
+; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0
@@ -1046,8 +1046,8 @@ define <16 x i16> @test_broadcast_8i16_1
; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512F-NEXT: vmovdqa32 %xmm0, (%eax)
+; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
@@ -1055,8 +1055,8 @@ define <16 x i16> @test_broadcast_8i16_1
; X32-AVX512BW: ## BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovdqu16 (%ecx), %xmm0
-; X32-AVX512BW-NEXT: vmovdqu16 %xmm0, (%eax)
+; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0
+; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax)
; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
@@ -1064,8 +1064,8 @@ define <16 x i16> @test_broadcast_8i16_1
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%eax)
+; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
@@ -1078,22 +1078,22 @@ define <16 x i16> @test_broadcast_8i16_1
;
; X64-AVX512F-LABEL: test_broadcast_8i16_16i16_reuse:
; X64-AVX512F: ## BB#0:
-; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512F-NEXT: vmovdqa32 %xmm0, (%rsi)
+; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse:
; X64-AVX512BW: ## BB#0:
-; X64-AVX512BW-NEXT: vmovdqu16 (%rdi), %xmm0
-; X64-AVX512BW-NEXT: vmovdqu16 %xmm0, (%rsi)
+; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi)
; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse:
; X64-AVX512DQ: ## BB#0:
-; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%rsi)
+; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <8 x i16>, <8 x i16> *%p0
@@ -1116,8 +1116,8 @@ define <32 x i8> @test_broadcast_16i8_32
; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512F-NEXT: vmovdqa32 %xmm0, (%eax)
+; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
@@ -1125,8 +1125,8 @@ define <32 x i8> @test_broadcast_16i8_32
; X32-AVX512BW: ## BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovdqu8 (%ecx), %xmm0
-; X32-AVX512BW-NEXT: vmovdqu8 %xmm0, (%eax)
+; X32-AVX512BW-NEXT: vmovdqu (%ecx), %xmm0
+; X32-AVX512BW-NEXT: vmovdqu %xmm0, (%eax)
; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
@@ -1134,8 +1134,8 @@ define <32 x i8> @test_broadcast_16i8_32
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%eax)
+; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax)
; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
@@ -1148,22 +1148,22 @@ define <32 x i8> @test_broadcast_16i8_32
;
; X64-AVX512F-LABEL: test_broadcast_16i8_32i8_reuse:
; X64-AVX512F: ## BB#0:
-; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512F-NEXT: vmovdqa32 %xmm0, (%rsi)
+; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse:
; X64-AVX512BW: ## BB#0:
-; X64-AVX512BW-NEXT: vmovdqu8 (%rdi), %xmm0
-; X64-AVX512BW-NEXT: vmovdqu8 %xmm0, (%rsi)
+; X64-AVX512BW-NEXT: vmovdqu (%rdi), %xmm0
+; X64-AVX512BW-NEXT: vmovdqu %xmm0, (%rsi)
; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse:
; X64-AVX512DQ: ## BB#0:
-; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512DQ-NEXT: vmovdqa32 %xmm0, (%rsi)
+; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <16 x i8>, <16 x i8> *%p0
@@ -1191,9 +1191,9 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X32-AVX512F-NEXT: vmovdqa32 %xmm1, (%eax)
+; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
@@ -1201,9 +1201,9 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512BW: ## BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X32-AVX512BW-NEXT: vmovdqa32 %xmm1, (%eax)
+; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
@@ -1211,7 +1211,7 @@ define <8 x i32> @test_broadcast_4i32_8i
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0
+; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
@@ -1227,23 +1227,23 @@ define <8 x i32> @test_broadcast_4i32_8i
;
; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512F: ## BB#0:
-; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X64-AVX512F-NEXT: vmovdqa32 %xmm1, (%rsi)
+; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512BW: ## BB#0:
-; X64-AVX512BW-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vmovdqa32 %xmm1, (%rsi)
+; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512DQ: ## BB#0:
-; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0
+; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
@@ -1270,9 +1270,9 @@ define <16 x i32> @test_broadcast_4i32_1
; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X32-AVX512F-NEXT: vmovdqa32 %xmm1, (%eax)
+; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
; X32-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0
; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
@@ -1281,9 +1281,9 @@ define <16 x i32> @test_broadcast_4i32_1
; X32-AVX512BW: ## BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovdqa64 (%ecx), %xmm0
-; X32-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X32-AVX512BW-NEXT: vmovdqa32 %xmm1, (%eax)
+; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0
+; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
; X32-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0
; X32-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; X32-AVX512BW-NEXT: retl
@@ -1292,7 +1292,7 @@ define <16 x i32> @test_broadcast_4i32_1
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovdqa64 (%ecx), %xmm0
+; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
; X32-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0
@@ -1310,25 +1310,25 @@ define <16 x i32> @test_broadcast_4i32_1
;
; X64-AVX512F-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX512F: ## BB#0:
-; X64-AVX512F-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512F-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X64-AVX512F-NEXT: vmovdqa32 %xmm1, (%rsi)
+; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
; X64-AVX512F-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0
; X64-AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX512BW: ## BB#0:
-; X64-AVX512BW-NEXT: vmovdqa64 (%rdi), %xmm0
-; X64-AVX512BW-NEXT: vpxord %xmm1, %xmm1, %xmm1
-; X64-AVX512BW-NEXT: vmovdqa32 %xmm1, (%rsi)
+; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
+; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
; X64-AVX512BW-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0
; X64-AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX512DQ: ## BB#0:
-; X64-AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm0
+; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
; X64-AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %zmm0, %zmm0
@@ -1355,18 +1355,18 @@ define void @fallback_broadcast_v4i64_to
; X32-AVX512-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,2,0,3,0,4,0,1,0,2,0,3,0,4,0]
; X32-AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm1
; X32-AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; X32-AVX512-NEXT: vmovdqu64 %ymm0, _ga4
+; X32-AVX512-NEXT: vmovdqu %ymm0, _ga4
; X32-AVX512-NEXT: vmovdqu64 %zmm1, _gb4
; X32-AVX512-NEXT: retl
;
; X64-AVX512-LABEL: fallback_broadcast_v4i64_to_v8i64:
; X64-AVX512: ## BB#0: ## %entry
-; X64-AVX512-NEXT: vmovdqa64 {{.*#+}} ymm2 = [1,2,3,4]
+; X64-AVX512-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,3,4]
; X64-AVX512-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; X64-AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; X64-AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm1
; X64-AVX512-NEXT: vpandq %zmm2, %zmm1, %zmm1
-; X64-AVX512-NEXT: vmovdqu64 %ymm0, {{.*}}(%rip)
+; X64-AVX512-NEXT: vmovdqu %ymm0, {{.*}}(%rip)
; X64-AVX512-NEXT: vmovdqu64 %zmm1, {{.*}}(%rip)
; X64-AVX512-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll Wed Dec 28 04:12:48 2016
@@ -7,7 +7,7 @@ define <4 x float> @v4f32(<4 x float> %a
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512VLDQ-LABEL: v4f32:
@@ -25,7 +25,7 @@ define <8 x float> @v8f32(<8 x float> %a
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512VLDQ-LABEL: v8f32:
@@ -59,9 +59,9 @@ define <16 x float> @v16f32(<16 x float>
define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
; AVX512VL-LABEL: v2f64:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512VLDQ-LABEL: v2f64:
@@ -79,7 +79,7 @@ define <4 x double> @v4f64(<4 x double>
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1
; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512VLDQ-LABEL: v4f64:
Modified: llvm/trunk/test/CodeGen/X86/vec_fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fabs.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll Wed Dec 28 04:12:48 2016
@@ -17,7 +17,7 @@ define <2 x double> @fabs_v2f64(<2 x dou
;
; X32_AVX512VL-LABEL: fabs_v2f64:
; X32_AVX512VL: # BB#0:
-; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX512VL-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X32_AVX512VL-NEXT: retl
;
; X32_AVX512VLDQ-LABEL: fabs_v2f64:
@@ -32,7 +32,7 @@ define <2 x double> @fabs_v2f64(<2 x dou
;
; X64_AVX512VL-LABEL: fabs_v2f64:
; X64_AVX512VL: # BB#0:
-; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64_AVX512VL-NEXT: retq
;
; X64_AVX512VLDQ-LABEL: fabs_v2f64:
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll Wed Dec 28 04:12:48 2016
@@ -2468,7 +2468,7 @@ define <4 x i32> @fptosi_2f128_to_4i32(<
; AVX512VL-NEXT: movq %rcx, %rsi
; AVX512VL-NEXT: callq __fixtfdi
; AVX512VL-NEXT: vmovq %rax, %xmm0
-; AVX512VL-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: movq %rbx, %rdi
; AVX512VL-NEXT: movq %r14, %rsi
; AVX512VL-NEXT: callq __fixtfdi
@@ -2516,7 +2516,7 @@ define <4 x i32> @fptosi_2f128_to_4i32(<
; AVX512VLDQ-NEXT: movq %rcx, %rsi
; AVX512VLDQ-NEXT: callq __fixtfdi
; AVX512VLDQ-NEXT: vmovq %rax, %xmm0
-; AVX512VLDQ-NEXT: vmovdqa64 %xmm0, (%rsp) # 16-byte Spill
+; AVX512VLDQ-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
; AVX512VLDQ-NEXT: movq %rbx, %rdi
; AVX512VLDQ-NEXT: movq %r14, %rsi
; AVX512VLDQ-NEXT: callq __fixtfdi
Modified: llvm/trunk/test/CodeGen/X86/vec_fpext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fpext.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fpext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fpext.ll Wed Dec 28 04:12:48 2016
@@ -28,8 +28,8 @@ define void @fpext_frommem(<2 x float>*
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x5a,0x01]
-; X32-AVX512VL-NEXT: vmovups %xmm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x08,0x11,0x00]
+; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x01]
+; X32-AVX512VL-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: fpext_frommem:
@@ -46,8 +46,8 @@ define void @fpext_frommem(<2 x float>*
;
; X64-AVX512VL-LABEL: fpext_frommem:
; X64-AVX512VL: # BB#0: # %entry
-; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x5a,0x07]
-; X64-AVX512VL-NEXT: vmovups %xmm0, (%rsi) # encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06]
+; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x07]
+; X64-AVX512VL-NEXT: vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = load <2 x float>, <2 x float>* %in, align 8
@@ -80,8 +80,8 @@ define void @fpext_frommem4(<4 x float>*
; X32-AVX512VL: # BB#0: # %entry
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
-; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x5a,0x01]
-; X32-AVX512VL-NEXT: vmovups %ymm0, (%eax) # encoding: [0x62,0xf1,0x7c,0x28,0x11,0x00]
+; X32-AVX512VL-NEXT: vcvtps2pd (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0x01]
+; X32-AVX512VL-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: fpext_frommem4:
@@ -101,8 +101,8 @@ define void @fpext_frommem4(<4 x float>*
;
; X64-AVX512VL-LABEL: fpext_frommem4:
; X64-AVX512VL: # BB#0: # %entry
-; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x5a,0x07]
-; X64-AVX512VL-NEXT: vmovups %ymm0, (%rsi) # encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06]
+; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0x07]
+; X64-AVX512VL-NEXT: vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06]
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = load <4 x float>, <4 x float>* %in
@@ -195,9 +195,9 @@ define <2 x double> @fpext_fromconst() {
;
; X32-AVX512VL-LABEL: fpext_fromconst:
; X32-AVX512VL: # BB#0: # %entry
-; X32-AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
-; X32-AVX512VL-NEXT: # encoding: [0x62,0xf1,0x7c,0x08,0x28,0x05,A,A,A,A]
-; X32-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
+; X32-AVX512VL-NEXT: vmovaps {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [1.000000e+00,-2.000000e+00]
+; X32-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
+; X32-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
;
; X64-SSE-LABEL: fpext_fromconst:
@@ -216,9 +216,9 @@ define <2 x double> @fpext_fromconst() {
;
; X64-AVX512VL-LABEL: fpext_fromconst:
; X64-AVX512VL: # BB#0: # %entry
-; X64-AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
-; X64-AVX512VL-NEXT: # encoding: [0x62,0xf1,0x7c,0x08,0x28,0x05,A,A,A,A]
-; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
+; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [1.000000e+00,-2.000000e+00]
+; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
+; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
entry:
%0 = insertelement <2 x float> undef, float 1.0, i32 0
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Wed Dec 28 04:12:48 2016
@@ -2594,7 +2594,7 @@ define <2 x double> @sitofp_load_2i64_to
;
; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
+; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
@@ -2774,7 +2774,7 @@ define <4 x double> @sitofp_load_4i64_to
;
; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0
+; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
@@ -2913,7 +2913,7 @@ define <2 x double> @uitofp_load_2i64_to
;
; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
+; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
@@ -3021,7 +3021,7 @@ define <2 x double> @uitofp_load_2i16_to
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7]
; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512VL-NEXT: retq
@@ -3037,7 +3037,7 @@ define <2 x double> @uitofp_load_2i16_to
; AVX512VLDQ: # BB#0:
; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VLDQ-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VLDQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7]
; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX512VLDQ-NEXT: retq
@@ -3189,7 +3189,7 @@ define <4 x double> @uitofp_load_4i64_to
;
; AVX512VL-LABEL: uitofp_load_4i64_to_4f64:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0
+; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
@@ -3420,7 +3420,7 @@ define <4 x float> @sitofp_load_4i64_to_
;
; AVX512VL-LABEL: sitofp_load_4i64_to_4f32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0
+; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
@@ -4007,7 +4007,7 @@ define <4 x float> @uitofp_load_4i64_to_
;
; AVX512VL-LABEL: uitofp_load_4i64_to_4f32:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0
+; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Wed Dec 28 04:12:48 2016
@@ -3010,7 +3010,7 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: retq
%1 = fptrunc <4 x float> %a0 to <4 x half>
@@ -3713,7 +3713,7 @@ define void @store_cvt_4f32_to_8i16_unde
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rdi)
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi)
; AVX512VL-NEXT: retq
%1 = fptrunc <4 x float> %a0 to <4 x half>
%2 = bitcast <4 x half> %1 to <4 x i16>
@@ -3827,9 +3827,9 @@ define void @store_cvt_4f32_to_8i16_zero
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rdi)
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi)
; AVX512VL-NEXT: retq
%1 = fptrunc <4 x float> %a0 to <4 x half>
%2 = bitcast <4 x half> %1 to <4 x i16>
@@ -4742,7 +4742,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: addq $40, %rsp
; AVX512VL-NEXT: popq %rbx
@@ -5373,7 +5373,7 @@ define void @store_cvt_4f64_to_8i16_unde
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
+; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
; AVX512VL-NEXT: addq $32, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
@@ -5544,9 +5544,9 @@ define void @store_cvt_4f64_to_8i16_zero
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
+; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
; AVX512VL-NEXT: addq $32, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll Wed Dec 28 04:12:48 2016
@@ -716,7 +716,7 @@ define <32 x i8> @testv32i8(<32 x i8> %i
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
@@ -805,7 +805,7 @@ define <32 x i8> @testv32i8u(<32 x i8> %
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512VLCD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Wed Dec 28 04:12:48 2016
@@ -423,7 +423,7 @@ define <16 x i8> @shuffle_v16i8_00_17_02
;
; AVX512VL-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512VL-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
@@ -462,7 +462,7 @@ define <16 x i8> @shuffle_v16i8_00_01_02
;
; AVX512VL-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX512VL-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
@@ -482,7 +482,7 @@ define <16 x i8> @shuffle_v16i8_00_01_02
;
; AVX512VL-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
ret <16 x i8> %shuffle
@@ -520,7 +520,7 @@ define <16 x i8> @shuffle_v16i8_00_01_02
;
; AVX512VL-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
; AVX512VL-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31>
@@ -560,7 +560,7 @@ define <16 x i8> @shuffle_v16i8_16_17_18
;
; AVX512VL-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
; AVX512VL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15>
@@ -710,7 +710,7 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz
;
; AVX512VL-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
@@ -747,7 +747,7 @@ define <16 x i8> @shuffle_v16i8_zz_uu_uu
;
; AVX512VL-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrb $15, %edi, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
@@ -784,7 +784,7 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19
;
; AVX512VL-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
@@ -1233,7 +1233,7 @@ define <16 x i8> @shuffle_v16i8_uu_10_02
; AVX512VL: # BB#0: # %entry
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
-; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: retq
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>
@@ -1270,9 +1270,9 @@ define void @constant_gets_selected(<4 x
;
; AVX512VL-LABEL: constant_gets_selected:
; AVX512VL: # BB#0: # %entry
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
-; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rdi)
-; AVX512VL-NEXT: vmovdqa32 %xmm0, (%rsi)
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi)
+; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi)
; AVX512VL-NEXT: retq
entry:
%weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8>
@@ -1420,7 +1420,7 @@ define <16 x i8> @PR12412(<16 x i8> %inv
;
; AVX512VL-LABEL: PR12412:
; AVX512VL: # BB#0: # %entry
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -1781,7 +1781,7 @@ define <16 x i8> @PR31364(i8* nocapture
;
; AVX512VL-LABEL: PR31364:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrb $0, (%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm0
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Wed Dec 28 04:12:48 2016
@@ -782,7 +782,7 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x
;
; AVX512VL-LABEL: shuffle_v2i64_z1:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
@@ -824,7 +824,7 @@ define <2 x double> @shuffle_v2f64_1z(<2
;
; AVX512VL-LABEL: shuffle_v2f64_1z:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -853,7 +853,7 @@ define <2 x double> @shuffle_v2f64_z0(<2
;
; AVX512VL-LABEL: shuffle_v2f64_z0:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
@@ -899,7 +899,7 @@ define <2 x double> @shuffle_v2f64_z1(<2
;
; AVX512VL-LABEL: shuffle_v2f64_z1:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
@@ -927,7 +927,7 @@ define <2 x double> @shuffle_v2f64_bitca
;
; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; AVX512VL-NEXT: retq
%shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
@@ -973,7 +973,7 @@ define <2 x i64> @shuffle_v2i64_bitcast_
;
; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX512VL-NEXT: retq
%bitcast32 = bitcast <2 x i64> %x to <4 x float>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Wed Dec 28 04:12:48 2016
@@ -1364,7 +1364,7 @@ define <4 x i32> @shuffle_v4i32_z6zz(<4
; AVX512VL-LABEL: shuffle_v4i32_z6zz:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
@@ -1691,7 +1691,7 @@ define <4 x i32> @shuffle_v4i32_0z23(<4
;
; AVX512VL-LABEL: shuffle_v4i32_0z23:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
@@ -1734,7 +1734,7 @@ define <4 x i32> @shuffle_v4i32_01z3(<4
;
; AVX512VL-LABEL: shuffle_v4i32_01z3:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
@@ -1777,7 +1777,7 @@ define <4 x i32> @shuffle_v4i32_012z(<4
;
; AVX512VL-LABEL: shuffle_v4i32_012z:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
@@ -1820,7 +1820,7 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4
;
; AVX512VL-LABEL: shuffle_v4i32_0zz3:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Wed Dec 28 04:12:48 2016
@@ -1423,7 +1423,7 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz
;
; AVX512VL-LABEL: shuffle_v8i16_z8zzzzzz:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
@@ -1446,7 +1446,7 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz
;
; AVX512VL-LABEL: shuffle_v8i16_zzzzz8zz:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
@@ -1469,7 +1469,7 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8
;
; AVX512VL-LABEL: shuffle_v8i16_zuuzuuz8:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
@@ -1492,7 +1492,7 @@ define <8 x i16> @shuffle_v8i16_zzBzzzzz
;
; AVX512VL-LABEL: shuffle_v8i16_zzBzzzzz:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
; AVX512VL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3
@@ -2110,7 +2110,7 @@ define <8 x i16> @shuffle_v8i16_0z234567
;
; AVX512VL-LABEL: shuffle_v8i16_0z234567:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -2142,7 +2142,7 @@ define <8 x i16> @shuffle_v8i16_0zzzz5z7
;
; AVX512VL-LABEL: shuffle_v8i16_0zzzz5z7:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
@@ -2174,7 +2174,7 @@ define <8 x i16> @shuffle_v8i16_0123456z
;
; AVX512VL-LABEL: shuffle_v8i16_0123456z:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Wed Dec 28 04:12:48 2016
@@ -170,7 +170,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -198,7 +198,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -225,7 +225,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -252,7 +252,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -278,7 +278,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -304,7 +304,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -330,7 +330,7 @@ define <16 x i16> @shuffle_v16i16_00_14_
;
; AVX512VL-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -717,7 +717,7 @@ define <16 x i16> @shuffle_v16i16_00_01_
;
; AVX512VL-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
@@ -741,7 +741,7 @@ define <16 x i16> @shuffle_v16i16_16_01_
;
; AVX512VL-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -765,7 +765,7 @@ define <16 x i16> @shuffle_v16i16_00_17_
;
; AVX512VL-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
@@ -789,7 +789,7 @@ define <16 x i16> @shuffle_v16i16_16_01_
;
; AVX512VL-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
@@ -849,7 +849,7 @@ define <16 x i16> @shuffle_v16i16_00_16_
;
; AVX512VL-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,0,16,0,16,0,16,8,24,8,24,8,24,8,24]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
@@ -876,9 +876,9 @@ define <16 x i16> @shuffle_v16i16_16_16_
;
; AVX512VL-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,20,21,22,23,8,8,8,8,28,29,30,31]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %shuffle
@@ -909,9 +909,9 @@ define <16 x i16> @shuffle_v16i16_19_18_
;
; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,2,1,0,23,22,21,20,11,10,9,8,31,30,29,28]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
ret <16 x i16> %shuffle
@@ -940,9 +940,9 @@ define <16 x i16> @shuffle_v16i16_19_18_
;
; AVX512VL-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,2,1,0,19,18,17,16,11,10,9,8,27,26,25,24]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
ret <16 x i16> %shuffle
@@ -1129,7 +1129,7 @@ define <16 x i16> @shuffle_v16i16_00_16_
;
; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,12,28,13,29,14,30,15,31]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -1155,7 +1155,7 @@ define <16 x i16> @shuffle_v16i16_04_20_
;
; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,20,5,21,6,22,7,23,8,24,9,25,10,26,11,27]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
@@ -1408,7 +1408,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,16,16,16,16,20,20,20,20]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
@@ -1435,7 +1435,7 @@ define <16 x i16> @shuffle_v16i16_08_08_
;
; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
@@ -1463,7 +1463,7 @@ define <16 x i16> @shuffle_v16i16_08_08_
;
; AVX512VL-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [8,8,8,8,12,12,12,12,24,24,24,24,28,28,28,28]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
@@ -1490,7 +1490,7 @@ define <16 x i16> @shuffle_v16i16_00_00_
;
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,4,4,4,4,24,24,24,24,28,28,28,28]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
@@ -1514,7 +1514,7 @@ define <16 x i16> @shuffle_v16i16_00_16_
;
; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -1748,7 +1748,7 @@ define <16 x i16> @shuffle_v16i16_01_02_
;
; AVX512VL-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [1,2,3,4,5,6,7,0,17,18,19,20,21,22,23,16]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
@@ -1771,7 +1771,7 @@ define <16 x i16> @shuffle_v16i16_07_00_
;
; AVX512VL-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [7,0,1,2,3,4,5,6,23,16,17,18,19,20,21,22]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
@@ -1867,7 +1867,7 @@ define <16 x i16> @shuffle_v16i16_04_05_
;
; AVX512VL-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,5,6,7,16,17,18,27,12,13,14,15,24,25,26,27]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
@@ -2211,7 +2211,7 @@ define <16 x i16> @shuffle_v16i16_07_05_
; AVX512VL-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@@ -2415,7 +2415,7 @@ define <16 x i16> @shuffle_v16i16_02_06_
; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@@ -2449,7 +2449,7 @@ define <16 x i16> @shuffle_v16i16_02_00_
; AVX512VL-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@@ -2483,7 +2483,7 @@ define <16 x i16> @shuffle_v16i16_02_06_
; AVX512VL-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@@ -2517,7 +2517,7 @@ define <16 x i16> @shuffle_v16i16_06_06_
; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@@ -2997,7 +2997,7 @@ define <16 x i16> @shuffle_v16i16_03_07_
; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vextracti32x4 $1, %ymm0, %xmm1
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@@ -3033,7 +3033,7 @@ define <16 x i16> @shuffle_v16i16_00_16_
;
; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
@@ -3066,7 +3066,7 @@ define <16 x i16> @shuffle_v16i16_00_20_
;
; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,20,1,21,2,22,3,31,8,28,9,29,10,30,11,31]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
@@ -3099,7 +3099,7 @@ define <16 x i16> @shuffle_v16i16_04_20_
;
; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -3132,7 +3132,7 @@ define <16 x i16> @shuffle_v16i16_04_16_
;
; AVX512VL-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,16,5,17,6,18,7,27,12,24,13,25,14,26,15,27]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
@@ -3172,7 +3172,7 @@ define <16 x i16> @shuffle_v16i16_00_16_
;
; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,6,22,7,31,8,24,9,25,14,30,15,31]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
@@ -3209,7 +3209,7 @@ define <16 x i16> @shuffle_v16i16_00_20_
;
; AVX512VL-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,20,1,21,6,16,7,25,8,28,9,29,14,24,15,25]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
@@ -3245,7 +3245,7 @@ define <16 x i16> @shuffle_v16i16_01_00_
;
; AVX512VL-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [1,0,17,16,3,2,19,26,9,8,25,24,11,10,27,26]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
@@ -3278,9 +3278,9 @@ define <16 x i16> @shuffle_v16i16_16_00_
;
; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,16,1,17,2,18,3,27,8,24,9,25,10,26,11,27]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
ret <16 x i16> %shuffle
@@ -3312,9 +3312,9 @@ define <16 x i16> @shuffle_v16i16_20_04_
;
; AVX512VL-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [4,20,5,21,6,22,7,31,12,28,13,29,14,30,15,31]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
ret <16 x i16> %shuffle
@@ -3350,7 +3350,7 @@ define <16 x i16> @shuffle_v16i16_00_02_
;
; AVX512VL-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,2,1,3,20,22,21,31,8,10,9,11,28,30,29,31]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
@@ -3380,7 +3380,7 @@ define <16 x i16> @shuffle_v16i16_04_04_
;
; AVX512VL-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <4,4,3,18,u,u,u,u,12,12,11,26,u,u,u,u>
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -3408,7 +3408,7 @@ define <16 x i16> @shuffle_v16i16_00_03_
;
; AVX512VL-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,3,2,21,u,u,u,u,8,11,10,29,u,u,u,u>
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -3449,7 +3449,7 @@ define <16 x i16> @shuffle_v16i16_00_01_
;
; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,1,2,21,u,u,u,u,8,9,10,29,u,u,u,u>
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -3475,9 +3475,9 @@ define <16 x i16> @shuffle_v16i16_uu_uu_
;
; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <u,u,u,u,4,5,6,27,u,u,u,u,12,13,14,27>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <u,u,u,u,4,5,6,27,u,u,u,u,12,13,14,27>
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
ret <16 x i16> %shuffle
@@ -3503,9 +3503,9 @@ define <16 x i16> @shuffle_v16i16_20_21_
;
; AVX512VL-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <4,5,6,19,u,u,u,u,12,13,14,27,u,u,u,u>
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i16> %shuffle
@@ -3537,7 +3537,7 @@ define <16 x i16> @shuffle_v16i16_00_01_
;
; AVX512VL-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,1,2,21,20,21,22,11,8,9,10,29,28,29,30,11]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
@@ -3563,7 +3563,7 @@ define <16 x i16> @shuffle_v16i16_00_17_
;
; AVX512VL-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,17,2,3,20,21,22,15,8,25,10,11,28,29,30,15]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
@@ -3596,7 +3596,7 @@ define <16 x i16> @shuffle_v16i16_uu_uu_
;
; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <u,u,u,1,u,5,7,25,u,u,u,9,u,13,15,25>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <u,u,u,1,u,5,7,25,u,u,u,9,u,13,15,25>
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
@@ -3627,9 +3627,9 @@ define <16 x i16> @shuffle_v16i16_uu_uu_
;
; AVX512VL-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <u,u,20,u,0,2,4,u,u,u,28,u,8,10,12,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <u,u,20,u,0,2,4,u,u,u,28,u,8,10,12,u>
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
ret <16 x i16> %shuffle
@@ -3658,7 +3658,7 @@ define <16 x i16> @shuffle_v16i16_21_22_
;
; AVX512VL-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
@@ -3773,9 +3773,9 @@ define <16 x i16> @shuffle_v16i16_19_20_
;
; AVX512VL-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
ret <16 x i16> %shuffle
@@ -3889,7 +3889,7 @@ define <16 x i16> @shuffle_v16i16_03_04_
;
; AVX512VL-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [3,4,5,6,7,16,17,26,11,12,13,14,15,24,25,26]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
@@ -3937,9 +3937,9 @@ define <16 x i16> @shuffle_v16i16_05_06_
;
; AVX512VL-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [21,22,23,0,1,2,3,12,29,30,31,8,9,10,11,12]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
ret <16 x i16> %shuffle
@@ -3985,9 +3985,9 @@ define <16 x i16> @shuffle_v16i16_23_uu_
;
; AVX512VL-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <7,u,19,u,4,4,21,u,15,u,27,u,12,12,29,u>
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
ret <16 x i16> %shuffle
@@ -4129,7 +4129,7 @@ define <16 x i16> @shuffle_v16i16_02_18_
;
; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_00_16_01_17_10_26_11_27_08_24_09_25:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
@@ -4166,7 +4166,7 @@ define <16 x i16> @shuffle_v16i16_02_18_
;
; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
; AVX512VL-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm2[0,2,1,3]
; AVX512VL-NEXT: retq
@@ -4256,9 +4256,9 @@ define <16 x i16> @PR24935(<16 x i16> %a
;
; AVX512VL-LABEL: PR24935:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu16 {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [11,10,17,13,10,7,27,0,17,25,0,12,29,20,16,8]
; AVX512VL-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 27, i32 26, i32 1, i32 29, i32 26, i32 23, i32 11, i32 16, i32 1, i32 9, i32 16, i32 28, i32 13, i32 4, i32 0, i32 24>
ret <16 x i16> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v32.ll Wed Dec 28 04:12:48 2016
@@ -317,10 +317,10 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vpxord %ymm2, %ymm2, %ymm2
+; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: vpbroadcastb %xmm0, %xmm0
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -349,7 +349,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: retq
@@ -379,7 +379,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: retq
@@ -409,7 +409,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX512VL-NEXT: retq
@@ -749,7 +749,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00
;
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -1037,7 +1037,7 @@ define <32 x i8> @shuffle_v32i8_00_33_02
;
; AVX512VL-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
@@ -1061,7 +1061,7 @@ define <32 x i8> @shuffle_v32i8_32_01_34
;
; AVX512VL-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
@@ -1076,7 +1076,7 @@ define <32 x i8> @shuffle_v32i8_zz_01_zz
;
; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpandq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
ret <32 x i8> %shuffle
@@ -1142,11 +1142,11 @@ define <32 x i8> @shuffle_v32i8_00_32_00
;
; AVX512VL-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %ymm2, %ymm2, %ymm2
+; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48>
@@ -1176,7 +1176,7 @@ define <32 x i8> @shuffle_v32i8_32_32_32
;
; AVX512VL-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpxord %ymm2, %ymm2, %ymm2
+; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
; AVX512VL-NEXT: retq
@@ -1399,7 +1399,7 @@ define <32 x i8> @shuffle_v32i8_00_32_01
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u]
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
@@ -1428,7 +1428,7 @@ define <32 x i8> @shuffle_v32i8_08_40_09
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u]
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
@@ -1684,13 +1684,13 @@ define <32 x i8> @shuffle_v32i8_42_45_12
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23]
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0>
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0>
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u]
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2],ymm0[3,4,5],ymm2[6],ymm0[7]
-; AVX512VL-NEXT: vmovdqu8 {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
+; AVX512VL-NEXT: vmovdqu {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 42, i32 45, i32 12, i32 13, i32 35, i32 35, i32 60, i32 40, i32 17, i32 22, i32 29, i32 44, i32 33, i32 12, i32 48, i32 51, i32 20, i32 19, i32 52, i32 19, i32 49, i32 54, i32 37, i32 32, i32 48, i32 42, i32 59, i32 7, i32 36, i32 34, i32 36, i32 39>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Wed Dec 28 04:12:48 2016
@@ -1049,7 +1049,7 @@ define <8 x i32> @shuffle_v8i32_00040000
;
; AVX512VL-LABEL: shuffle_v8i32_00040000:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
@@ -1072,7 +1072,7 @@ define <8 x i32> @shuffle_v8i32_00500000
;
; AVX512VL-LABEL: shuffle_v8i32_00500000:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -1095,7 +1095,7 @@ define <8 x i32> @shuffle_v8i32_06000000
;
; AVX512VL-LABEL: shuffle_v8i32_06000000:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -1150,7 +1150,7 @@ define <8 x i32> @shuffle_v8i32_00112233
;
; AVX512VL-LABEL: shuffle_v8i32_00112233:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
@@ -1296,7 +1296,7 @@ define <8 x i32> @shuffle_v8i32_08192a3b
; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1325,7 +1325,7 @@ define <8 x i32> @shuffle_v8i32_08991abb
; AVX512VL-LABEL: shuffle_v8i32_08991abb:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
; AVX512VL-NEXT: vpermi2d %ymm2, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
@@ -1564,7 +1564,7 @@ define <8 x i32> @shuffle_v8i32_00015444
;
; AVX512VL-LABEL: shuffle_v8i32_00015444:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
@@ -1585,7 +1585,7 @@ define <8 x i32> @shuffle_v8i32_00204644
;
; AVX512VL-LABEL: shuffle_v8i32_00204644:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
@@ -1606,7 +1606,7 @@ define <8 x i32> @shuffle_v8i32_03004474
;
; AVX512VL-LABEL: shuffle_v8i32_03004474:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
@@ -1627,7 +1627,7 @@ define <8 x i32> @shuffle_v8i32_10004444
;
; AVX512VL-LABEL: shuffle_v8i32_10004444:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
@@ -1648,7 +1648,7 @@ define <8 x i32> @shuffle_v8i32_22006446
;
; AVX512VL-LABEL: shuffle_v8i32_22006446:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
@@ -1669,7 +1669,7 @@ define <8 x i32> @shuffle_v8i32_33307474
;
; AVX512VL-LABEL: shuffle_v8i32_33307474:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
@@ -1690,7 +1690,7 @@ define <8 x i32> @shuffle_v8i32_32104567
;
; AVX512VL-LABEL: shuffle_v8i32_32104567:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
@@ -1711,7 +1711,7 @@ define <8 x i32> @shuffle_v8i32_00236744
;
; AVX512VL-LABEL: shuffle_v8i32_00236744:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
@@ -1732,7 +1732,7 @@ define <8 x i32> @shuffle_v8i32_00226644
;
; AVX512VL-LABEL: shuffle_v8i32_00226644:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
@@ -1753,7 +1753,7 @@ define <8 x i32> @shuffle_v8i32_10324567
;
; AVX512VL-LABEL: shuffle_v8i32_10324567:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
@@ -1774,7 +1774,7 @@ define <8 x i32> @shuffle_v8i32_11334567
;
; AVX512VL-LABEL: shuffle_v8i32_11334567:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1795,7 +1795,7 @@ define <8 x i32> @shuffle_v8i32_01235467
;
; AVX512VL-LABEL: shuffle_v8i32_01235467:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
@@ -1816,7 +1816,7 @@ define <8 x i32> @shuffle_v8i32_01235466
;
; AVX512VL-LABEL: shuffle_v8i32_01235466:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
@@ -1837,7 +1837,7 @@ define <8 x i32> @shuffle_v8i32_002u6u44
;
; AVX512VL-LABEL: shuffle_v8i32_002u6u44:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
@@ -1858,7 +1858,7 @@ define <8 x i32> @shuffle_v8i32_00uu66uu
;
; AVX512VL-LABEL: shuffle_v8i32_00uu66uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
@@ -1879,7 +1879,7 @@ define <8 x i32> @shuffle_v8i32_103245uu
;
; AVX512VL-LABEL: shuffle_v8i32_103245uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
@@ -1900,7 +1900,7 @@ define <8 x i32> @shuffle_v8i32_1133uu67
;
; AVX512VL-LABEL: shuffle_v8i32_1133uu67:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
@@ -1921,7 +1921,7 @@ define <8 x i32> @shuffle_v8i32_0uu354uu
;
; AVX512VL-LABEL: shuffle_v8i32_0uu354uu:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
@@ -1942,7 +1942,7 @@ define <8 x i32> @shuffle_v8i32_uuu3uu66
;
; AVX512VL-LABEL: shuffle_v8i32_uuu3uu66:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
; AVX512VL-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
@@ -1969,9 +1969,9 @@ define <8 x i32> @shuffle_v8i32_6caa87e5
;
; AVX512VL-LABEL: shuffle_v8i32_6caa87e5:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vmovdqa32 {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13]
+; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13]
; AVX512VL-NEXT: vpermi2d %ymm0, %ymm1, %ymm2
-; AVX512VL-NEXT: vmovdqa64 %ymm2, %ymm0
+; AVX512VL-NEXT: vmovdqa %ymm2, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
ret <8 x i32> %shuffle
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll Wed Dec 28 04:12:48 2016
@@ -22,18 +22,18 @@ define <16 x i16> @combine_vpermt2var_16
; X32-LABEL: combine_vpermt2var_16i16_identity_mask:
; X32: # BB#0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; X32-NEXT: vmovdqu {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z}
-; X32-NEXT: vmovdqu16 {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X32-NEXT: vmovdqu {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X32-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i16_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; X64-NEXT: vmovdqu {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2 {%k1} {z}
-; X64-NEXT: vmovdqu16 {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X64-NEXT: vmovdqu {{.*#+}} ymm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X64-NEXT: vpermi2w %ymm2, %ymm2, %ymm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %x0, <16 x i16> %x1, i16 %m)
@@ -44,13 +44,13 @@ define <16 x i16> @combine_vpermt2var_16
define <16 x i16> @combine_vpermi2var_16i16_as_permw(<16 x i16> %x0, <16 x i16> %x1) {
; X32-LABEL: combine_vpermi2var_16i16_as_permw:
; X32: # BB#0:
-; X32-NEXT: vmovdqu16 {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
+; X32-NEXT: vmovdqu {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; X32-NEXT: vpermw %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_16i16_as_permw:
; X64: # BB#0:
-; X64-NEXT: vmovdqu16 {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
+; X64-NEXT: vmovdqu {{.*#+}} ymm1 = [15,0,14,1,13,2,12,3,11,4,10,5,9,6,8,7]
; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %x1, i16 -1)
@@ -61,13 +61,13 @@ define <16 x i16> @combine_vpermi2var_16
define <16 x i16> @combine_vpermt2var_vpermi2var_16i16_as_vperm2(<16 x i16> %x0, <16 x i16> %x1) {
; X32-LABEL: combine_vpermt2var_vpermi2var_16i16_as_vperm2:
; X32: # BB#0:
-; X32-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
+; X32-NEXT: vmovdqu {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X32-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_16i16_as_vperm2:
; X64: # BB#0:
-; X64-NEXT: vmovdqu16 {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
+; X64-NEXT: vmovdqu {{.*#+}} ymm2 = [0,31,2,2,4,29,6,27,8,25,10,23,12,21,14,19]
; X64-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
; X64-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> <i16 0, i16 31, i16 2, i16 29, i16 4, i16 27, i16 6, i16 25, i16 8, i16 23, i16 10, i16 21, i16 12, i16 19, i16 14, i16 17>, <16 x i16> %x1, i16 -1)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll Wed Dec 28 04:12:48 2016
@@ -37,18 +37,18 @@ define <16 x i8> @combine_vpermt2var_16i
; X32-LABEL: combine_vpermt2var_16i8_identity_mask:
; X32: # BB#0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X32-NEXT: vmovdqu8 {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; X32-NEXT: vmovdqu {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z}
-; X32-NEXT: vmovdqu8 {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X32-NEXT: vmovdqu {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i8_identity_mask:
; X64: # BB#0:
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vmovdqu8 {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; X64-NEXT: vmovdqu {{.*#+}} xmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2 {%k1} {z}
-; X64-NEXT: vmovdqu8 {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
+; X64-NEXT: vmovdqu {{.*#+}} xmm0 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %x0, <16 x i8> %x1, i16 %m)
@@ -73,13 +73,13 @@ define <16 x i8> @combine_vpermi2var_16i
define <32 x i8> @combine_vpermi2var_32i8_as_vpermb(<32 x i8> %x0, <32 x i8> %x1) {
; X32-LABEL: combine_vpermi2var_32i8_as_vpermb:
; X32: # BB#0:
-; X32-NEXT: vmovdqu8 {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
+; X32-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X32-NEXT: vpermb %ymm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_32i8_as_vpermb:
; X64: # BB#0:
-; X64-NEXT: vmovdqu8 {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
+; X64-NEXT: vmovdqu {{.*#+}} ymm1 = [0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,0,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X64-NEXT: vpermb %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
%res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
@@ -106,17 +106,17 @@ define <64 x i8> @combine_vpermi2var_64i
define <16 x i8> @combine_vpermt2var_vpermi2var_16i8_as_vperm2(<16 x i8> %x0, <16 x i8> %x1) {
; X32-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2:
; X32: # BB#0:
-; X32-NEXT: vmovdqu8 {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
+; X32-NEXT: vmovdqu {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
; X32-NEXT: vpermi2b %xmm1, %xmm0, %xmm2
-; X32-NEXT: vmovdqu8 {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
+; X32-NEXT: vmovdqu {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
; X32-NEXT: vpermi2b %xmm2, %xmm2, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_vpermi2var_16i8_as_vperm2:
; X64: # BB#0:
-; X64-NEXT: vmovdqu8 {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
+; X64-NEXT: vmovdqu {{.*#+}} xmm2 = [0,31,2,29,4,27,6,25,8,23,10,21,12,19,14,17]
; X64-NEXT: vpermi2b %xmm1, %xmm0, %xmm2
-; X64-NEXT: vmovdqu8 {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
+; X64-NEXT: vmovdqu {{.*#+}} xmm0 = [0,17,2,18,4,19,6,21,8,23,10,25,12,27,14,29]
; X64-NEXT: vpermi2b %xmm2, %xmm2, %xmm0
; X64-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> <i8 0, i8 31, i8 2, i8 29, i8 4, i8 27, i8 6, i8 25, i8 8, i8 23, i8 10, i8 21, i8 12, i8 19, i8 14, i8 17>, <16 x i8> %x1, i16 -1)
@@ -126,13 +126,13 @@ define <16 x i8> @combine_vpermt2var_vpe
define <32 x i8> @combine_vpermi2var_32i8_as_vperm2(<32 x i8> %x0, <32 x i8> %x1) {
; X32-LABEL: combine_vpermi2var_32i8_as_vperm2:
; X32: # BB#0:
-; X32-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
+; X32-NEXT: vmovdqu {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X32-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_32i8_as_vperm2:
; X64: # BB#0:
-; X64-NEXT: vmovdqu8 {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
+; X64-NEXT: vmovdqu {{.*#+}} ymm2 = [0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19,0,32,1,23,2,22,3,21,4,22,5,21,6,20,7,19]
; X64-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
; X64-NEXT: retq
%res0 = shufflevector <32 x i8> %x0, <32 x i8> %x1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-masked.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-masked.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-masked.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-masked.ll Wed Dec 28 04:12:48 2016
@@ -6,7 +6,7 @@ define <4 x i32> @mask_shuffle_v4i32_123
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0]
-; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
%mask.cast = bitcast i8 %mask to <8 x i1>
@@ -33,7 +33,7 @@ define <4 x i32> @mask_shuffle_v4i32_234
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1]
-; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
%mask.cast = bitcast i8 %mask to <8 x i1>
@@ -60,7 +60,7 @@ define <2 x i64> @mask_shuffle_v2i64_12(
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
-; CHECK-NEXT: vmovdqa64 %xmm2, %xmm0
+; CHECK-NEXT: vmovdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
%mask.cast = bitcast i8 %mask to <8 x i1>
@@ -87,7 +87,7 @@ define <4 x i64> @mask_shuffle_v4i64_123
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0]
-; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0
+; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
%mask.cast = bitcast i8 %mask to <8 x i1>
@@ -114,7 +114,7 @@ define <4 x i64> @mask_shuffle_v4i64_123
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0]
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
%mask.cast = bitcast i8 %mask to <8 x i1>
@@ -141,7 +141,7 @@ define <8 x i32> @mask_shuffle_v8i32_123
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0]
-; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0
+; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%mask.cast = bitcast i8 %mask to <8 x i1>
@@ -166,7 +166,7 @@ define <8 x i32> @mask_shuffle_v8i32_234
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1]
-; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0
+; CHECK-NEXT: vmovdqa %ymm2, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
%mask.cast = bitcast i8 %mask to <8 x i1>
@@ -191,7 +191,7 @@ define <8 x i32> @mask_shuffle_v8i32_123
; CHECK: # BB#0:
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0]
-; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0
+; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
%mask.cast = bitcast i8 %mask to <8 x i1>
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc.ll Wed Dec 28 04:12:48 2016
@@ -535,7 +535,7 @@ define void @trunc16i16_16i8(<16 x i16>
; AVX512VL: # BB#0: # %entry
; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512VL-NEXT: vmovdqu32 %xmm0, (%rax)
+; AVX512VL-NEXT: vmovdqu %xmm0, (%rax)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc16i16_16i8:
@@ -644,7 +644,7 @@ define void @trunc32i16_32i8(<32 x i16>
; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT: vmovdqu32 %ymm0, (%rax)
+; AVX512VL-NEXT: vmovdqu %ymm0, (%rax)
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc32i16_32i8:
@@ -1100,7 +1100,7 @@ define <16 x i8> @trunc2x8i16_16i8(<8 x
;
; AVX512VL-LABEL: trunc2x8i16_16i8:
; AVX512VL: # BB#0: # %entry
-; AVX512VL-NEXT: vmovdqa64 {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -1116,7 +1116,7 @@ define <16 x i8> @trunc2x8i16_16i8(<8 x
;
; AVX512BWVL-LABEL: trunc2x8i16_16i8:
; AVX512BWVL: # BB#0: # %entry
-; AVX512BWVL-NEXT: vmovdqu8 {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX512BWVL-NEXT: vmovdqu {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -1202,7 +1202,7 @@ define <16 x i8> @trunc16i64_16i8_const(
;
; AVX512VL-LABEL: trunc16i64_16i8_const:
; AVX512VL: # BB#0: # %entry
-; AVX512VL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc16i64_16i8_const:
@@ -1212,7 +1212,7 @@ define <16 x i8> @trunc16i64_16i8_const(
;
; AVX512BWVL-LABEL: trunc16i64_16i8_const:
; AVX512BWVL: # BB#0: # %entry
-; AVX512BWVL-NEXT: vpxord %xmm0, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll Wed Dec 28 04:12:48 2016
@@ -136,16 +136,16 @@ define <2 x i64> @testv2i64(<2 x i64> %i
;
; AVX512CDVL-LABEL: testv2i64:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm2
-; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm3
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm3
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %xmm3, %xmm4, %xmm3
; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX512CDVL-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
@@ -316,11 +316,11 @@ define <2 x i64> @testv2i64u(<2 x i64> %
;
; AVX512CDVL-LABEL: testv2i64u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vplzcntq %xmm0, %xmm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [63,63]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [63,63]
; AVX512CDVL-NEXT: vpsubq %xmm0, %xmm1, %xmm0
; AVX512CDVL-NEXT: retq
;
@@ -510,16 +510,16 @@ define <4 x i32> @testv4i32(<4 x i32> %i
;
; AVX512CDVL-LABEL: testv4i32:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm2
-; AVX512CDVL-NEXT: vpandd %xmm2, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm3
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm3
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %xmm3, %xmm4, %xmm3
; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512CDVL-NEXT: vpandq %xmm2, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX512CDVL-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
@@ -731,9 +731,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %
;
; AVX512CDVL-LABEL: testv4i32u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1
-; AVX512CDVL-NEXT: vpandd %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vplzcntd %xmm0, %xmm0
; AVX512CDVL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
; AVX512CDVL-NEXT: vpsubd %xmm0, %xmm1, %xmm0
@@ -913,16 +913,16 @@ define <8 x i16> @testv8i16(<8 x i16> %i
;
; AVX512CDVL-LABEL: testv8i16:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2
; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsllw $8, %xmm0, %xmm1
@@ -1111,16 +1111,16 @@ define <8 x i16> @testv8i16u(<8 x i16> %
;
; AVX512CDVL-LABEL: testv8i16u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2
; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsllw $8, %xmm0, %xmm1
@@ -1287,16 +1287,16 @@ define <16 x i8> @testv16i8(<16 x i8> %i
;
; AVX512CDVL-LABEL: testv16i8:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2
; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: retq
@@ -1453,16 +1453,16 @@ define <16 x i8> @testv16i8u(<16 x i8> %
;
; AVX512CDVL-LABEL: testv16i8u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512CDVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %xmm2, %xmm3, %xmm2
; AVX512CDVL-NEXT: vpsrlw $4, %xmm0, %xmm0
-; AVX512CDVL-NEXT: vpandq %xmm1, %xmm0, %xmm0
+; AVX512CDVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512CDVL-NEXT: vpshufb %xmm0, %xmm3, %xmm0
; AVX512CDVL-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX512CDVL-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll Wed Dec 28 04:12:48 2016
@@ -59,16 +59,16 @@ define <4 x i64> @testv4i64(<4 x i64> %i
;
; AVX512CDVL-LABEL: testv4i64:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm2
-; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubq {{.*}}(%rip){1to4}, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm3
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512CDVL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
@@ -164,9 +164,9 @@ define <4 x i64> @testv4i64u(<4 x i64> %
;
; AVX512CDVL-LABEL: testv4i64u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm1
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vplzcntq %ymm0, %ymm0
; AVX512CDVL-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
; AVX512CDVL-NEXT: vpsubq %ymm0, %ymm1, %ymm0
@@ -266,16 +266,16 @@ define <8 x i32> @testv8i32(<8 x i32> %i
;
; AVX512CDVL-LABEL: testv8i32:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm2
-; AVX512CDVL-NEXT: vpandd %ymm2, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubd {{.*}}(%rip){1to8}, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm3
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vpandq %ymm2, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
; AVX512CDVL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
@@ -396,9 +396,9 @@ define <8 x i32> @testv8i32u(<8 x i32> %
;
; AVX512CDVL-LABEL: testv8i32u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm1
-; AVX512CDVL-NEXT: vpandd %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vplzcntd %ymm0, %ymm0
; AVX512CDVL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
; AVX512CDVL-NEXT: vpsubd %ymm0, %ymm1, %ymm0
@@ -496,16 +496,16 @@ define <16 x i16> @testv16i16(<16 x i16>
;
; AVX512CDVL-LABEL: testv16i16:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsllw $8, %ymm0, %ymm1
@@ -611,16 +611,16 @@ define <16 x i16> @testv16i16u(<16 x i16
;
; AVX512CDVL-LABEL: testv16i16u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsllw $8, %ymm0, %ymm1
@@ -717,16 +717,16 @@ define <32 x i8> @testv32i8(<32 x i8> %i
;
; AVX512CDVL-LABEL: testv32i8:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: retq
@@ -814,16 +814,16 @@ define <32 x i8> @testv32i8u(<32 x i8> %
;
; AVX512CDVL-LABEL: testv32i8u:
; AVX512CDVL: # BB#0:
-; AVX512CDVL-NEXT: vpxord %ymm1, %ymm1, %ymm1
+; AVX512CDVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512CDVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm2
-; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX512CDVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512CDVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512CDVL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; AVX512CDVL-NEXT: vpandq %ymm1, %ymm0, %ymm0
+; AVX512CDVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512CDVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512CDVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512CDVL-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/viabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/viabs.ll?rev=290663&r1=290662&r2=290663&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/viabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/viabs.ll Wed Dec 28 04:12:48 2016
@@ -452,7 +452,7 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x
; AVX512: # BB#0:
; AVX512-NEXT: vpsraq $63, %xmm0, %xmm1
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpxorq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%tmp1neg = sub <2 x i64> zeroinitializer, %a
%b = icmp sge <2 x i64> %a, zeroinitializer
@@ -501,7 +501,7 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x
; AVX512: # BB#0:
; AVX512-NEXT: vpsraq $63, %ymm0, %ymm1
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpxorq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%tmp1neg = sub <4 x i64> zeroinitializer, %a
%b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1>
More information about the llvm-commits
mailing list