r336470 - [X86] Add missing scalar fma intrinsics with rounding, but no mask.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 6 15:08:43 PDT 2018


Author: ctopper
Date: Fri Jul  6 15:08:43 2018
New Revision: 336470

URL: http://llvm.org/viewvc/llvm-project?rev=336470&view=rev
Log:
[X86] Add missing scalar fma intrinsics with rounding, but no mask.

We had the mask versions of the rounding intrinsics, but not one without masking.

Also change the rounding tests to not use the CUR_DIRECTION rounding mode.

Modified:
    cfe/trunk/lib/Headers/avx512fintrin.h
    cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=336470&r1=336469&r2=336470&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Jul  6 15:08:43 2018
@@ -7824,6 +7824,12 @@ _mm_mask_fmadd_ss (__m128 __W, __mmask8
  return __W;
 }
 
+#define _mm_fmadd_round_ss(A, B, C, R) \
+  (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
+                                         (int)(R))
+
 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
   (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(W), \
                                          (__v2df)(__m128d)(A), \
@@ -7872,6 +7878,12 @@ _mm_mask_fmsub_ss (__m128 __W, __mmask8
  return __W;
 }
 
+#define _mm_fmsub_round_ss(A, B, C, R) \
+  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+                                        (__v4sf)(__m128)(B), \
+                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
+                                        (int)(R))
+
 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
                                         (__v4sf)(__m128)(A), \
@@ -7920,6 +7932,12 @@ _mm_mask_fnmadd_ss (__m128 __W, __mmask8
  return __W;
 }
 
+#define _mm_fnmadd_round_ss(A, B, C, R) \
+  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+                                        -(__v4sf)(__m128)(B), \
+                                        (__v4sf)(__m128)(C), (__mmask8)-1, \
+                                        (int)(R))
+
 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
                                         -(__v4sf)(__m128)(A), \
@@ -7968,6 +7986,12 @@ _mm_mask_fnmsub_ss (__m128 __W, __mmask8
  return __W;
 }
 
+#define _mm_fnmsub_round_ss(A, B, C, R) \
+  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
+                                        -(__v4sf)(__m128)(B), \
+                                        -(__v4sf)(__m128)(C), (__mmask8)-1, \
+                                        (int)(R))
+
 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
                                         -(__v4sf)(__m128)(A), \
@@ -8016,6 +8040,12 @@ _mm_mask_fmadd_sd (__m128d __W, __mmask8
  return __W;
 }
 
+#define _mm_fmadd_round_sd(A, B, C, R) \
+  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
+                                         (int)(R))
+
 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
                                          (__v2df)(__m128d)(A), \
@@ -8064,6 +8094,12 @@ _mm_mask_fmsub_sd (__m128d __W, __mmask8
  return __W;
 }
 
+#define _mm_fmsub_round_sd(A, B, C, R) \
+  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
+                                         (int)(R))
+
 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
                                          (__v2df)(__m128d)(A), \
@@ -8112,6 +8148,12 @@ _mm_mask_fnmadd_sd (__m128d __W, __mmask
  return __W;
 }
 
+#define _mm_fnmadd_round_sd(A, B, C, R) \
+  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+                                         -(__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(C), (__mmask8)-1, \
+                                         (int)(R))
+
 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
                                          -(__v2df)(__m128d)(A), \
@@ -8160,6 +8202,12 @@ _mm_mask_fnmsub_sd (__m128d __W, __mmask
  return __W;
 }
 
+#define _mm_fnmsub_round_sd(A, B, C, R) \
+  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
+                                         -(__v2df)(__m128d)(B), \
+                                         -(__v2df)(__m128d)(C), (__mmask8)-1, \
+                                         (int)(R))
+
 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
                                          -(__v2df)(__m128d)(A), \

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=336470&r1=336469&r2=336470&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Fri Jul  6 15:08:43 2018
@@ -7234,10 +7234,16 @@ __m128 test_mm_mask_fmadd_ss(__m128 __W,
   return _mm_mask_fmadd_ss(__W, __U, __A, __B);
 }
 
+__m128 test_mm_fmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
+  // CHECK-LABEL: @test_mm_fmadd_round_ss
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+  return _mm_fmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128 test_mm_mask_fmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
   // CHECK-LABEL: @test_mm_mask_fmadd_round_ss
   // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
-  return _mm_mask_fmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
@@ -7265,7 +7271,7 @@ __m128 test_mm_maskz_fmadd_ss(__mmask8 _
 __m128 test_mm_maskz_fmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
   // CHECK-LABEL: @test_mm_maskz_fmadd_round_ss
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
-  return _mm_maskz_fmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
@@ -7294,7 +7300,7 @@ __m128 test_mm_mask3_fmadd_ss(__m128 __W
 __m128 test_mm_mask3_fmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fmadd_round_ss
   // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
-  return _mm_mask3_fmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
@@ -7321,10 +7327,16 @@ __m128 test_mm_mask_fmsub_ss(__m128 __W,
   return _mm_mask_fmsub_ss(__W, __U, __A, __B);
 }
 
+__m128 test_mm_fmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
+  // CHECK-LABEL: @test_mm_fmsub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+  return _mm_fmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128 test_mm_mask_fmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
   // CHECK-LABEL: @test_mm_mask_fmsub_round_ss
   // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
-  return _mm_mask_fmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
@@ -7353,7 +7365,7 @@ __m128 test_mm_maskz_fmsub_ss(__mmask8 _
 __m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
   // CHECK-LABEL: @test_mm_maskz_fmsub_round_ss
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
-  return _mm_maskz_fmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
@@ -7383,7 +7395,7 @@ __m128 test_mm_mask3_fmsub_ss(__m128 __W
 __m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fmsub_round_ss
   // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss
-  return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
@@ -7410,10 +7422,16 @@ __m128 test_mm_mask_fnmadd_ss(__m128 __W
   return _mm_mask_fnmadd_ss(__W, __U, __A, __B);
 }
 
+__m128 test_mm_fnmadd_round_ss(__m128 __A, __m128 __B, __m128 __C){
+  // CHECK-LABEL: @test_mm_fnmadd_round_ss
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+  return _mm_fnmadd_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128 test_mm_mask_fnmadd_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
   // CHECK-LABEL: @test_mm_mask_fnmadd_round_ss
   // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
-  return _mm_mask_fnmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fnmadd_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
@@ -7442,7 +7460,7 @@ __m128 test_mm_maskz_fnmadd_ss(__mmask8
 __m128 test_mm_maskz_fnmadd_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
   // CHECK-LABEL: @test_mm_maskz_fnmadd_round_ss
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
-  return _mm_maskz_fnmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fnmadd_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
@@ -7472,7 +7490,7 @@ __m128 test_mm_mask3_fnmadd_ss(__m128 __
 __m128 test_mm_mask3_fnmadd_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fnmadd_round_ss
   // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss
-  return _mm_mask3_fnmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fnmadd_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
@@ -7500,10 +7518,16 @@ __m128 test_mm_mask_fnmsub_ss(__m128 __W
   return _mm_mask_fnmsub_ss(__W, __U, __A, __B);
 }
 
+__m128 test_mm_fnmsub_round_ss(__m128 __A, __m128 __B, __m128 __C){
+  // CHECK-LABEL: @test_mm_fnmsub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
+  return _mm_fnmsub_round_ss(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128 test_mm_mask_fnmsub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){
   // CHECK-LABEL: @test_mm_mask_fnmsub_round_ss
   // CHECK: @llvm.x86.avx512.mask.vfmadd.ss
-  return _mm_mask_fnmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fnmsub_round_ss(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
@@ -7533,7 +7557,7 @@ __m128 test_mm_maskz_fnmsub_ss(__mmask8
 __m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C){
   // CHECK-LABEL: @test_mm_maskz_fnmsub_round_ss
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.ss
-  return _mm_maskz_fnmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fnmsub_round_ss(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
@@ -7564,7 +7588,7 @@ __m128 test_mm_mask3_fnmsub_ss(__m128 __
 __m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss
   // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss
-  return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
@@ -7590,10 +7614,16 @@ __m128d test_mm_mask_fmadd_sd(__m128d __
   return _mm_mask_fmadd_sd(__W, __U, __A, __B);
 }
 
+__m128d test_mm_fmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
+  // CHECK-LABEL: @test_mm_fmadd_round_sd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+  return _mm_fmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128d test_mm_mask_fmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
   // CHECK-LABEL: @test_mm_mask_fmadd_round_sd
   // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
-  return _mm_mask_fmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
@@ -7621,7 +7651,7 @@ __m128d test_mm_maskz_fmadd_sd(__mmask8
 __m128d test_mm_maskz_fmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
   // CHECK-LABEL: @test_mm_maskz_fmadd_round_sd
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
-  return _mm_maskz_fmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
@@ -7650,7 +7680,7 @@ __m128d test_mm_mask3_fmadd_sd(__m128d _
 __m128d test_mm_mask3_fmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fmadd_round_sd
   // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
-  return _mm_mask3_fmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
@@ -7677,10 +7707,16 @@ __m128d test_mm_mask_fmsub_sd(__m128d __
   return _mm_mask_fmsub_sd(__W, __U, __A, __B);
 }
 
+__m128d test_mm_fmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
+  // CHECK-LABEL: @test_mm_fmsub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+  return _mm_fmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128d test_mm_mask_fmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
   // CHECK-LABEL: @test_mm_mask_fmsub_round_sd
   // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
-  return _mm_mask_fmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
@@ -7709,7 +7745,7 @@ __m128d test_mm_maskz_fmsub_sd(__mmask8
 __m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
   // CHECK-LABEL: @test_mm_maskz_fmsub_round_sd
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
-  return _mm_maskz_fmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
@@ -7739,7 +7775,7 @@ __m128d test_mm_mask3_fmsub_sd(__m128d _
 __m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fmsub_round_sd
   // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd
-  return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
@@ -7766,10 +7802,16 @@ __m128d test_mm_mask_fnmadd_sd(__m128d _
   return _mm_mask_fnmadd_sd(__W, __U, __A, __B);
 }
 
+__m128d test_mm_fnmadd_round_sd(__m128d __A, __m128d __B, __m128d __C){
+  // CHECK-LABEL: @test_mm_fnmadd_round_sd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+  return _mm_fnmadd_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128d test_mm_mask_fnmadd_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
   // CHECK-LABEL: @test_mm_mask_fnmadd_round_sd
   // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
-  return _mm_mask_fnmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fnmadd_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
@@ -7798,7 +7840,7 @@ __m128d test_mm_maskz_fnmadd_sd(__mmask8
 __m128d test_mm_maskz_fnmadd_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
   // CHECK-LABEL: @test_mm_maskz_fnmadd_round_sd
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
-  return _mm_maskz_fnmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fnmadd_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
@@ -7828,7 +7870,7 @@ __m128d test_mm_mask3_fnmadd_sd(__m128d
 __m128d test_mm_mask3_fnmadd_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fnmadd_round_sd
   // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd
-  return _mm_mask3_fnmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fnmadd_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
@@ -7856,10 +7898,16 @@ __m128d test_mm_mask_fnmsub_sd(__m128d _
   return _mm_mask_fnmsub_sd(__W, __U, __A, __B);
 }
 
+__m128d test_mm_fnmsub_round_sd(__m128d __A, __m128d __B, __m128d __C){
+  // CHECK-LABEL: @test_mm_fnmsub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
+  return _mm_fnmsub_round_sd(__A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+}
+
 __m128d test_mm_mask_fnmsub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){
   // CHECK-LABEL: @test_mm_mask_fnmsub_round_sd
   // CHECK: @llvm.x86.avx512.mask.vfmadd.sd
-  return _mm_mask_fnmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask_fnmsub_round_sd(__W, __U, __A, __B, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
@@ -7889,7 +7937,7 @@ __m128d test_mm_maskz_fnmsub_sd(__mmask8
 __m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C){
   // CHECK-LABEL: @test_mm_maskz_fnmsub_round_sd
   // CHECK: @llvm.x86.avx512.maskz.vfmadd.sd
-  return _mm_maskz_fnmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_CUR_DIRECTION);
+  return _mm_maskz_fnmsub_round_sd(__U, __A, __B, __C, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
@@ -7920,7 +7968,7 @@ __m128d test_mm_mask3_fnmsub_sd(__m128d
 __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){
   // CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd
   // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd
-  return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION);
+  return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512d test_mm512_permutex_pd(__m512d __X) {




More information about the cfe-commits mailing list