[llvm] r332205 - [X86] Add patterns for combining movss+uint_to_fp into the intrinsic instructions under AVX512.

Sun May 13 16:24:21 PDT 2018

Author: ctopper
Date: Sun May 13 16:24:21 2018
New Revision: 332205

URL: http://llvm.org/viewvc/llvm-project?rev=332205&view=rev
Log:
[X86] Add patterns for combining movss+uint_to_fp into the intrinsic instructions under AVX512.

This matches what we do for sint_to_fp.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=332205&r1=332204&r2=332205&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun May 13 16:24:21 2018
@@ -6970,6 +6970,46 @@ def : Pat<(v2f64 (X86Movsd
                    (v2f64 VR128X:$dst),
                    (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
+
+def : Pat<(v4f32 (X86Movss
+                   (v4f32 VR128X:$dst),
+                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
+          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
+
+def : Pat<(v4f32 (X86Movss
+                   (v4f32 VR128X:$dst),
+                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
+          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
+
+def : Pat<(v4f32 (X86Movss
+                   (v4f32 VR128X:$dst),
+                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
+          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
+
+def : Pat<(v4f32 (X86Movss
+                   (v4f32 VR128X:$dst),
+                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
+          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
+
+def : Pat<(v2f64 (X86Movsd
+                   (v2f64 VR128X:$dst),
+                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
+          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
+
+def : Pat<(v2f64 (X86Movsd
+                   (v2f64 VR128X:$dst),
+                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
+          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
+
+def : Pat<(v2f64 (X86Movsd
+                   (v2f64 VR128X:$dst),
+                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
+          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
+
+def : Pat<(v2f64 (X86Movsd
+                   (v2f64 VR128X:$dst),
+                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
+          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
 } // Predicates = [HasAVX512]
 
 // Convert float/double to signed/unsigned int 32/64 with truncation

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=332205&r1=332204&r2=332205&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Sun May 13 16:24:21 2018
@@ -1989,14 +1989,12 @@ define <2 x double> @test_mm_cvtu32_sd(<
 ; X32-LABEL: test_mm_cvtu32_sd:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vcvtusi2sdl %eax, %xmm1, %xmm1
-; X32-NEXT:    vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X32-NEXT:    vcvtusi2sdl %eax, %xmm0, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_cvtu32_sd:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vcvtusi2sdl %edi, %xmm1, %xmm1
-; X64-NEXT:    vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X64-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0
 ; X64-NEXT:    retq
 entry:
   %conv.i = uitofp i32 %__B to double
@@ -2017,8 +2015,7 @@ define <2 x double> @test_mm_cvtu64_sd(<
 ;
 ; X64-LABEL: test_mm_cvtu64_sd:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vcvtusi2sdq %rdi, %xmm1, %xmm1
-; X64-NEXT:    vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X64-NEXT:    vcvtusi2sdq %rdi, %xmm0, %xmm0
 ; X64-NEXT:    retq
 entry:
   %conv.i = uitofp i64 %__B to double
@@ -2030,14 +2027,12 @@ define <4 x float> @test_mm_cvtu32_ss(<4
 ; X32-LABEL: test_mm_cvtu32_ss:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vcvtusi2ssl %eax, %xmm1, %xmm1
-; X32-NEXT:    vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X32-NEXT:    vcvtusi2ssl %eax, %xmm0, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_cvtu32_ss:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vcvtusi2ssl %edi, %xmm1, %xmm1
-; X64-NEXT:    vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0
 ; X64-NEXT:    retq
 entry:
   %conv.i = uitofp i32 %__B to float
@@ -2074,8 +2069,7 @@ define <4 x float> @test_mm_cvtu64_ss(<4
 ;
 ; X64-LABEL: test_mm_cvtu64_ss:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vcvtusi2ssq %rdi, %xmm1, %xmm1
-; X64-NEXT:    vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT:    vcvtusi2ssq %rdi, %xmm0, %xmm0
 ; X64-NEXT:    retq
 entry:
   %conv.i = uitofp i64 %__B to float