r332203 - [X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and _mm_cvtu64_ss.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Sun May 13 16:03:30 PDT 2018


Author: ctopper
Date: Sun May 13 16:03:30 2018
New Revision: 332203

URL: http://llvm.org/viewvc/llvm-project?rev=332203&view=rev
Log:
[X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and _mm_cvtu64_ss.

We can use direct C code for these that will use uitofp and insertelement instructions.

For the versions that take an explicit rounding mode we can't do this.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=332203&r1=332202&r2=332203&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sun May 13 16:03:30 2018
@@ -1840,7 +1840,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtw2mask2
 TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2sd32, "V2dV2dUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi")
 TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "nc", "avx512vbmi,avx512vl")

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=332203&r1=332202&r2=332203&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Sun May 13 16:03:30 2018
@@ -9521,7 +9521,8 @@ _mm_maskz_cvtss_sd (__mmask8 __U, __m128
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtu32_sd (__m128d __A, unsigned __B)
 {
-  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
+  __A[0] = __B;
+  return __A;
 }
 
 #ifdef __x86_64__
@@ -9532,8 +9533,8 @@ _mm_cvtu32_sd (__m128d __A, unsigned __B
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
 {
-  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
-                 _MM_FROUND_CUR_DIRECTION);
+  __A[0] = __B;
+  return __A;
 }
 #endif
 
@@ -9544,8 +9545,8 @@ _mm_cvtu64_sd (__m128d __A, unsigned lon
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtu32_ss (__m128 __A, unsigned __B)
 {
-  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
-                _MM_FROUND_CUR_DIRECTION);
+  __A[0] = __B;
+  return __A;
 }
 
 #ifdef __x86_64__
@@ -9556,8 +9557,8 @@ _mm_cvtu32_ss (__m128 __A, unsigned __B)
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
 {
-  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
-                _MM_FROUND_CUR_DIRECTION);
+  __A[0] = __B;
+  return __A;
 }
 #endif
 

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=332203&r1=332202&r2=332203&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Sun May 13 16:03:30 2018
@@ -7007,7 +7007,8 @@ __m128d test_mm_maskz_cvt_roundss_sd( __
 
 __m128d test_mm_cvtu32_sd(__m128d __A, unsigned __B) {
   // CHECK-LABEL: @test_mm_cvtu32_sd
-  // CHECK: @llvm.x86.avx512.cvtusi2sd
+  // CHECK: uitofp i32 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtu32_sd(__A, __B); 
 }
 
@@ -7020,7 +7021,8 @@ __m128d test_mm_cvt_roundu64_sd(__m128d
 
 __m128d test_mm_cvtu64_sd(__m128d __A, unsigned long long __B) {
   // CHECK-LABEL: @test_mm_cvtu64_sd
-  // CHECK: @llvm.x86.avx512.cvtusi642sd
+  // CHECK: uitofp i64 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtu64_sd(__A, __B); 
 }
 #endif
@@ -7033,7 +7035,8 @@ __m128 test_mm_cvt_roundu32_ss(__m128 __
 
 __m128 test_mm_cvtu32_ss(__m128 __A, unsigned __B) {
   // CHECK-LABEL: @test_mm_cvtu32_ss
-  // CHECK: @llvm.x86.avx512.cvtusi2ss
+  // CHECK: uitofp i32 %{{.*}} to float
+  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_cvtu32_ss(__A, __B); 
 }
 
@@ -7046,7 +7049,8 @@ __m128 test_mm_cvt_roundu64_ss(__m128 __
 
 __m128 test_mm_cvtu64_ss(__m128 __A, unsigned long long __B) {
   // CHECK-LABEL: @test_mm_cvtu64_ss
-  // CHECK: @llvm.x86.avx512.cvtusi642ss
+  // CHECK: uitofp i64 %{{.*}} to float
+  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_cvtu64_ss(__A, __B); 
 }
 #endif




More information about the cfe-commits mailing list