[cfe-commits] r147372 - in /cfe/trunk: include/clang/Basic/BuiltinsX86.def lib/Headers/fma4intrin.h lib/Headers/x86intrin.h test/CodeGen/fma4-builtins.c

Craig Topper craig.topper at gmail.com
Fri Dec 30 01:15:03 PST 2011


Author: ctopper
Date: Fri Dec 30 03:15:03 2011
New Revision: 147372

URL: http://llvm.org/viewvc/llvm-project?rev=147372&view=rev
Log:
Add FMA4 intrinsics.

Added:
    cfe/trunk/lib/Headers/fma4intrin.h
    cfe/trunk/test/CodeGen/fma4-builtins.c
Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/Headers/x86intrin.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=147372&r1=147371&r2=147372&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Dec 30 03:15:03 2011
@@ -612,4 +612,38 @@
 BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "")
 BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "")
 
+// FMA4
+BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfmsubps, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfmsubpd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfmsubss, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfmsubsd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfnmaddps, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfnmaddpd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfnmaddss, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfnmaddsd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfnmsubps, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfnmsubpd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfnmsubss, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfnmsubsd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfmsubaddps, "V4fV4fV4fV4f", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd, "V2dV2dV2dV2d", "")
+BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "")
+BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "")
+BUILTIN(__builtin_ia32_vfmsubps256, "V8fV8fV8fV8f", "")
+BUILTIN(__builtin_ia32_vfmsubpd256, "V4dV4dV4dV4d", "")
+BUILTIN(__builtin_ia32_vfnmaddps256, "V8fV8fV8fV8f", "")
+BUILTIN(__builtin_ia32_vfnmaddpd256, "V4dV4dV4dV4d", "")
+BUILTIN(__builtin_ia32_vfnmsubps256, "V8fV8fV8fV8f", "")
+BUILTIN(__builtin_ia32_vfnmsubpd256, "V4dV4dV4dV4d", "")
+BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "")
+BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "")
+BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "")
+BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "")
+
 #undef BUILTIN

Added: cfe/trunk/lib/Headers/fma4intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/fma4intrin.h?rev=147372&view=auto
==============================================================================
--- cfe/trunk/lib/Headers/fma4intrin.h (added)
+++ cfe/trunk/lib/Headers/fma4intrin.h Fri Dec 30 03:15:03 2011
@@ -0,0 +1,231 @@
+/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __X86INTRIN_H
+#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __FMA4INTRIN_H
+#define __FMA4INTRIN_H
+
+#ifndef __FMA4__
+# error "FMA4 instruction set is not enabled"
+#else
+
+#include <pmmintrin.h>
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
+}
+
+static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
+}
+
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
+}
+
+static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
+}
+
+static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
+}
+
+static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
+}
+
+static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
+}
+
+static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
+}
+
+static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
+}
+
+static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
+}
+
+static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
+}
+
+static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
+}
+
+static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
+}
+
+static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
+}
+
+static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
+}
+
+#endif /* __FMA4__ */
+
+#endif /* __FMA4INTRIN_H */

Modified: cfe/trunk/lib/Headers/x86intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/x86intrin.h?rev=147372&r1=147371&r2=147372&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/x86intrin.h (original)
+++ cfe/trunk/lib/Headers/x86intrin.h Fri Dec 30 03:15:03 2011
@@ -42,6 +42,10 @@
 #include <popcntintrin.h>
 #endif
 
+#ifdef __FMA4__
+#include <fma4intrin.h>
+#endif
+
 // FIXME: SSE4A, 3dNOW, XOP, LWP, ABM
 
 #endif /* __X86INTRIN_H */

Added: cfe/trunk/test/CodeGen/fma4-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/fma4-builtins.c?rev=147372&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/fma4-builtins.c (added)
+++ cfe/trunk/test/CodeGen/fma4-builtins.c Fri Dec 30 03:15:03 2011
@@ -0,0 +1,166 @@
+// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfmadd.ps
+  return _mm_macc_ps(a, b, c);
+}
+
+__m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfmadd.pd
+  return _mm_macc_pd(a, b, c);
+}
+
+__m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfmadd.ss
+  return _mm_macc_ss(a, b, c);
+}
+
+__m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfmadd.sd
+  return _mm_macc_sd(a, b, c);
+}
+
+__m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfmsub.ps
+  return _mm_msub_ps(a, b, c);
+}
+
+__m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfmsub.pd
+  return _mm_msub_pd(a, b, c);
+}
+
+__m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfmsub.ss
+  return _mm_msub_ss(a, b, c);
+}
+
+__m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfmsub.sd
+  return _mm_msub_sd(a, b, c);
+}
+
+__m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfnmadd.ps
+  return _mm_nmacc_ps(a, b, c);
+}
+
+__m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfnmadd.pd
+  return _mm_nmacc_pd(a, b, c);
+}
+
+__m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfnmadd.ss
+  return _mm_nmacc_ss(a, b, c);
+}
+
+__m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfnmadd.sd
+  return _mm_nmacc_sd(a, b, c);
+}
+
+__m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfnmsub.ps
+  return _mm_nmsub_ps(a, b, c);
+}
+
+__m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfnmsub.pd
+  return _mm_nmsub_pd(a, b, c);
+}
+
+__m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfnmsub.ss
+  return _mm_nmsub_ss(a, b, c);
+}
+
+__m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfnmsub.sd
+  return _mm_nmsub_sd(a, b, c);
+}
+
+__m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfmaddsub.ps
+  return _mm_maddsub_ps(a, b, c);
+}
+
+__m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfmaddsub.pd
+  return _mm_maddsub_pd(a, b, c);
+}
+
+__m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK: @llvm.x86.fma4.vfmsubadd.ps
+  return _mm_msubadd_ps(a, b, c);
+}
+
+__m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK: @llvm.x86.fma4.vfmsubadd.pd
+  return _mm_msubadd_pd(a, b, c);
+}
+
+__m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK: @llvm.x86.fma4.vfmadd.ps.256
+  return _mm256_macc_ps(a, b, c);
+}
+
+__m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK: @llvm.x86.fma4.vfmadd.pd.256
+  return _mm256_macc_pd(a, b, c);
+}
+
+__m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK: @llvm.x86.fma4.vfmsub.ps.256
+  return _mm256_msub_ps(a, b, c);
+}
+
+__m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK: @llvm.x86.fma4.vfmsub.pd.256
+  return _mm256_msub_pd(a, b, c);
+}
+
+__m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK: @llvm.x86.fma4.vfnmadd.ps.256
+  return _mm256_nmacc_ps(a, b, c);
+}
+
+__m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK: @llvm.x86.fma4.vfnmadd.pd.256
+  return _mm256_nmacc_pd(a, b, c);
+}
+
+__m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK: @llvm.x86.fma4.vfnmsub.ps.256
+  return _mm256_nmsub_ps(a, b, c);
+}
+
+__m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK: @llvm.x86.fma4.vfnmsub.pd.256
+  return _mm256_nmsub_pd(a, b, c);
+}
+
+__m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK: @llvm.x86.fma4.vfmaddsub.ps.256
+  return _mm256_maddsub_ps(a, b, c);
+}
+
+__m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK: @llvm.x86.fma4.vfmaddsub.pd.256
+  return _mm256_maddsub_pd(a, b, c);
+}
+
+__m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK: @llvm.x86.fma4.vfmsubadd.ps.256
+  return _mm256_msubadd_ps(a, b, c);
+}
+
+__m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK: @llvm.x86.fma4.vfmsubadd.pd.256
+  return _mm256_msubadd_pd(a, b, c);
+}





More information about the cfe-commits mailing list