[llvm] r277933 - [AVX-512] Add AVX-512 scalar CVT instructions to hasUndefRegUpdate.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 6 12:31:50 PDT 2016
Author: ctopper
Date: Sat Aug 6 14:31:50 2016
New Revision: 277933
URL: http://llvm.org/viewvc/llvm-project?rev=277933&view=rev
Log:
[AVX-512] Add AVX-512 scalar CVT instructions to hasUndefRegUpdate.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=277933&r1=277932&r2=277933&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Aug 6 14:31:50 2016
@@ -6255,7 +6255,31 @@ static bool hasUndefRegUpdate(unsigned O
case X86::VSQRTSDr_Int:
case X86::VSQRTSDm:
case X86::VSQRTSDm_Int:
- // AVX-512
+ // AVX-512
+ case X86::VCVTSI2SSZrr:
+ case X86::VCVTSI2SSZrm:
+ case X86::Int_VCVTSI2SSZrr:
+ case X86::Int_VCVTSI2SSZrm:
+ case X86::VCVTSI2SSZrr_Int:
+ case X86::VCVTSI2SSZrm_Int:
+ case X86::VCVTSI642SSZrr:
+ case X86::VCVTSI642SSZrm:
+ case X86::Int_VCVTSI2SS64Zrr:
+ case X86::Int_VCVTSI2SS64Zrm:
+ case X86::VCVTSI642SSZrr_Int:
+ case X86::VCVTSI642SSZrm_Int:
+ case X86::VCVTSI2SDZrr:
+ case X86::VCVTSI2SDZrm:
+ case X86::Int_VCVTSI2SDZrr:
+ case X86::Int_VCVTSI2SDZrm:
+ case X86::VCVTSI2SDZrr_Int:
+ case X86::VCVTSI2SDZrm_Int:
+ case X86::VCVTSI642SDZrr:
+ case X86::VCVTSI642SDZrm:
+ case X86::Int_VCVTSI2SD64Zrr:
+ case X86::Int_VCVTSI2SD64Zrm:
+ case X86::VCVTSI642SDZrr_Int:
+ case X86::VCVTSI642SDZrm_Int:
case X86::VCVTSD2SSZrr:
case X86::VCVTSD2SSZrm:
case X86::VCVTSS2SDZrr:
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=277933&r1=277932&r2=277933&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Sat Aug 6 14:31:50 2016
@@ -36,6 +36,7 @@ define <8 x double> @sltof864(<8 x i64>
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3
; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
@@ -62,6 +63,7 @@ define <4 x double> @sltof464(<4 x i64>
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -81,6 +83,7 @@ define <2 x float> @sltof2f32(<2 x i64>
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
@@ -111,6 +114,7 @@ define <4 x float> @sltof4f32_mem(<4 x i
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; KNL-NEXT: retq
@@ -191,6 +195,7 @@ define <4 x float> @sltof432(<4 x i64> %
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; KNL-NEXT: retq
@@ -884,6 +889,7 @@ define <2 x float> @sitofp_2i1_float(<2
; KNL-NEXT: vmovq %xmm0, %rdx
; KNL-NEXT: testb $1, %dl
; KNL-NEXT: cmovnel %eax, %ecx
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssl %ecx, %xmm0, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: retq
@@ -1089,6 +1095,7 @@ define <2 x float> @uitofp_2i1_float(<2
; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=277933&r1=277932&r2=277933&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Sat Aug 6 14:31:50 2016
@@ -25,24 +25,15 @@ define <2 x double> @sitofp_2i64_to_2f64
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; VEX-LABEL: sitofp_2i64_to_2f64:
-; VEX: # BB#0:
-; VEX-NEXT: vpextrq $1, %xmm0, %rax
-; VEX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
-; VEX-NEXT: vmovq %xmm0, %rax
-; VEX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; VEX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
-; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; VEX-NEXT: retq
-;
-; AVX512-LABEL: sitofp_2i64_to_2f64:
-; AVX512: # BB#0:
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
-; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX512-NEXT: retq
+; AVX-LABEL: sitofp_2i64_to_2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
%cvt = sitofp <2 x i64> %a to <2 x double>
ret <2 x double> %cvt
}
@@ -260,6 +251,7 @@ define <4 x double> @sitofp_4i64_to_4f64
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
@@ -950,30 +942,18 @@ define <4 x float> @sitofp_2i64_to_4f32(
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; VEX-LABEL: sitofp_2i64_to_4f32:
-; VEX: # BB#0:
-; VEX-NEXT: vpextrq $1, %xmm0, %rax
-; VEX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; VEX-NEXT: vmovq %xmm0, %rax
-; VEX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; VEX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; VEX-NEXT: retq
-;
-; AVX512-LABEL: sitofp_2i64_to_4f32:
-; AVX512: # BB#0:
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; AVX512-NEXT: retq
+; AVX-LABEL: sitofp_2i64_to_4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT: retq
%cvt = sitofp <2 x i64> %a to <2 x float>
%ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x float> %ext
@@ -995,30 +975,18 @@ define <4 x float> @sitofp_4i64_to_4f32_
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; VEX-LABEL: sitofp_4i64_to_4f32_undef:
-; VEX: # BB#0:
-; VEX-NEXT: vpextrq $1, %xmm0, %rax
-; VEX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; VEX-NEXT: vmovq %xmm0, %rax
-; VEX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; VEX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; VEX-NEXT: retq
-;
-; AVX512-LABEL: sitofp_4i64_to_4f32_undef:
-; AVX512: # BB#0:
-; AVX512-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX512-NEXT: vmovq %xmm0, %rax
-; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
-; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; AVX512-NEXT: retq
+; AVX-LABEL: sitofp_4i64_to_4f32_undef:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT: retq
%ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%cvt = sitofp <4 x i64> %ext to <4 x float>
ret <4 x float> %cvt
@@ -1222,6 +1190,7 @@ define <4 x float> @sitofp_4i64_to_4f32(
; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512-NEXT: retq
@@ -2170,6 +2139,7 @@ define <2 x double> @sitofp_load_2i64_to
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512-NEXT: retq
@@ -2312,6 +2282,7 @@ define <4 x double> @sitofp_load_4i64_to
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; AVX512-NEXT: vmovq %xmm0, %rax
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
@@ -2840,6 +2811,7 @@ define <4 x float> @sitofp_load_4i64_to_
; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512-NEXT: retq
@@ -3033,6 +3005,7 @@ define <8 x float> @sitofp_load_8i64_to_
; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm3
; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
; AVX512-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
More information about the llvm-commits
mailing list