r318985 - [X86] Use separate builtins for fma4 scalar intrinsics. Use negations to remove some of the scalar fma3 builtins.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Sat Nov 25 11:32:12 PST 2017


Author: ctopper
Date: Sat Nov 25 11:32:12 2017
New Revision: 318985

URL: http://llvm.org/viewvc/llvm-project?rev=318985&view=rev
Log:
[X86] Use separate builtins for fma4 scalar intrinsics. Use negations to remove some of the scalar fma3 builtins.

fma4 instructions zero the upper bits of the xmm register. fma3 instructions leave the bits unmodified. This requires separate builtins for the different semantics.

While we're cleaning up the scalar builtins this also removes the fma3 fmsub/fnmadd/fnmsub builtins by using negates in the header file.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/fma4intrin.h
    cfe/trunk/lib/Headers/fmaintrin.h
    cfe/trunk/test/CodeGen/fma-builtins.c
    cfe/trunk/test/CodeGen/fma4-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=318985&r1=318984&r2=318985&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Sat Nov 25 11:32:12 2017
@@ -682,14 +682,10 @@ TARGET_BUILTIN(__builtin_ia32_sha256msg2
 // FMA
 TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubss3, "V4fV4fV4fV4f", "", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
 TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")

Modified: cfe/trunk/lib/Headers/fma4intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fma4intrin.h?rev=318985&r1=318984&r2=318985&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/fma4intrin.h (original)
+++ cfe/trunk/lib/Headers/fma4intrin.h Sat Nov 25 11:32:12 2017
@@ -48,13 +48,13 @@ _mm_macc_pd(__m128d __A, __m128d __B, __
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -72,13 +72,13 @@ _mm_msub_pd(__m128d __A, __m128d __B, __
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -96,13 +96,13 @@ _mm_nmacc_pd(__m128d __A, __m128d __B, _
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -120,13 +120,13 @@ _mm_nmsub_pd(__m128d __A, __m128d __B, _
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/fmaintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fmaintrin.h?rev=318985&r1=318984&r2=318985&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/fmaintrin.h (original)
+++ cfe/trunk/lib/Headers/fmaintrin.h Sat Nov 25 11:32:12 2017
@@ -70,13 +70,13 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, _
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -94,13 +94,13 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B,
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
@@ -118,13 +118,13 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B,
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/fma-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/fma-builtins.c?rev=318985&r1=318984&r2=318985&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/fma-builtins.c (original)
+++ cfe/trunk/test/CodeGen/fma-builtins.c Sat Nov 25 11:32:12 2017
@@ -43,13 +43,15 @@ __m128d test_mm_fmsub_pd(__m128d a, __m1
 
 __m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_fmsub_ss
-  // CHECK: @llvm.x86.fma.vfmsub.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_fmsub_ss(a, b, c);
 }
 
 __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_fmsub_sd
-  // CHECK: @llvm.x86.fma.vfmsub.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_fmsub_sd(a, b, c);
 }
 
@@ -69,13 +71,15 @@ __m128d test_mm_fnmadd_pd(__m128d a, __m
 
 __m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_fnmadd_ss
-  // CHECK: @llvm.x86.fma.vfnmadd.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> %{{.+}})
   return _mm_fnmadd_ss(a, b, c);
 }
 
 __m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_fnmadd_sd
-  // CHECK: @llvm.x86.fma.vfnmadd.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> %{{.+}})
   return _mm_fnmadd_sd(a, b, c);
 }
 
@@ -97,13 +101,17 @@ __m128d test_mm_fnmsub_pd(__m128d a, __m
 
 __m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_fnmsub_ss
-  // CHECK: @llvm.x86.fma.vfnmsub.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> [[NEG2]])
   return _mm_fnmsub_ss(a, b, c);
 }
 
 __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_fnmsub_sd
-  // CHECK: @llvm.x86.fma.vfnmsub.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> [[NEG2]])
   return _mm_fnmsub_sd(a, b, c);
 }
 

Modified: cfe/trunk/test/CodeGen/fma4-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/fma4-builtins.c?rev=318985&r1=318984&r2=318985&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/fma4-builtins.c (original)
+++ cfe/trunk/test/CodeGen/fma4-builtins.c Sat Nov 25 11:32:12 2017
@@ -17,13 +17,13 @@ __m128d test_mm_macc_pd(__m128d a, __m12
 
 __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_macc_ss
-  // CHECK: @llvm.x86.fma.vfmadd.ss
+  // CHECK: @llvm.x86.fma4.vfmadd.ss
   return _mm_macc_ss(a, b, c);
 }
 
 __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_macc_sd
-  // CHECK: @llvm.x86.fma.vfmadd.sd
+  // CHECK: @llvm.x86.fma4.vfmadd.sd
   return _mm_macc_sd(a, b, c);
 }
 
@@ -43,13 +43,15 @@ __m128d test_mm_msub_pd(__m128d a, __m12
 
 __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_msub_ss
-  // CHECK: @llvm.x86.fma.vfmsub.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]])
   return _mm_msub_ss(a, b, c);
 }
 
 __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_msub_sd
-  // CHECK: @llvm.x86.fma.vfmsub.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]])
   return _mm_msub_sd(a, b, c);
 }
 
@@ -69,13 +71,15 @@ __m128d test_mm_nmacc_pd(__m128d a, __m1
 
 __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_nmacc_ss
-  // CHECK: @llvm.x86.fma.vfnmadd.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}})
   return _mm_nmacc_ss(a, b, c);
 }
 
 __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_nmacc_sd
-  // CHECK: @llvm.x86.fma.vfnmadd.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}})
   return _mm_nmacc_sd(a, b, c);
 }
 
@@ -97,13 +101,17 @@ __m128d test_mm_nmsub_pd(__m128d a, __m1
 
 __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
   // CHECK-LABEL: test_mm_nmsub_ss
-  // CHECK: @llvm.x86.fma.vfnmsub.ss
+  // CHECK: [[NEG:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]])
   return _mm_nmsub_ss(a, b, c);
 }
 
 __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
   // CHECK-LABEL: test_mm_nmsub_sd
-  // CHECK: @llvm.x86.fma.vfnmsub.sd
+  // CHECK: [[NEG:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: [[NEG2:%.+]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %{{.+}}
+  // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]])
   return _mm_nmsub_sd(a, b, c);
 }
 




More information about the cfe-commits mailing list