[PATCH] D132342: [X86][AVX512FP16] Relax limitation to AVX512FP16 intrinsics. NFCI

Sun Aug 21 19:39:54 PDT 2022

pengfei updated this revision to Diff 454362.
pengfei added a comment.

Don't know why, but check `!defined(__SSE2__)` leads to compiler_builtins_x86.c fails like below. Move the check into avx512[vl]fp16intrin.h instead.

  error: 'error' diagnostics seen but not expected:
    File /export/users/pengfeiw/llvm-project/clang/test/Modules/compiler_builtins_x86.c Line 8: could not build module '_Builtin_intrinsics'
  1 error generated.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132342/new/

https://reviews.llvm.org/D132342

Files:
  clang/include/clang/Basic/BuiltinsX86.def
  clang/lib/Headers/avx512fp16intrin.h
  clang/lib/Headers/avx512vlfp16intrin.h
  clang/lib/Headers/immintrin.h


Index: clang/lib/Headers/immintrin.h
===================================================================

--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -214,17 +214,13 @@
 #include <avx512pfintrin.h>
 #endif
 
-/*
- * FIXME: _Float16 type is legal only when HW support float16 operation.
- * We use __AVX512FP16__ to identify if float16 is supported or not, so
- * when float16 is not supported, the related header is not included.
- *
- */
-#if defined(__AVX512FP16__)
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__AVX512FP16__)
 #include <avx512fp16intrin.h>
 #endif
 
-#if defined(__AVX512FP16__) && defined(__AVX512VL__)
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    (defined(__AVX512VL__) && defined(__AVX512FP16__))
 #include <avx512vlfp16intrin.h>
 #endif
 
Index: clang/lib/Headers/avx512vlfp16intrin.h
===================================================================
--- clang/lib/Headers/avx512vlfp16intrin.h
+++ clang/lib/Headers/avx512vlfp16intrin.h
@@ -11,6 +11,8 @@
     "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead."
 #endif
 
+#ifdef __SSE2__
+
 #ifndef __AVX512VLFP16INTRIN_H
 #define __AVX512VLFP16INTRIN_H
 
@@ -2066,3 +2068,4 @@
 #undef __DEFAULT_FN_ATTRS256
 
 #endif
+#endif
Index: clang/lib/Headers/avx512fp16intrin.h
===================================================================
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -10,6 +10,8 @@
 #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
 #endif
 
+#ifdef __SSE2__
+
 #ifndef __AVX512FP16INTRIN_H
 #define __AVX512FP16INTRIN_H
 
@@ -829,7 +831,7 @@
   struct __mm_load_sh_struct {
     _Float16 __u;
   } __attribute__((__packed__, __may_alias__));
-  _Float16 __u = ((struct __mm_load_sh_struct *)__dp)->__u;
+  _Float16 __u = ((const struct __mm_load_sh_struct *)__dp)->__u;
   return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0};
 }
 
@@ -838,13 +840,13 @@
   __m128h src = (__v8hf)__builtin_shufflevector(
       (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8);
 
-  return (__m128h)__builtin_ia32_loadsh128_mask((__v8hf *)__A, src, __U & 1);
+  return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U & 1);
 }
 
 static __inline__ __m128h __DEFAULT_FN_ATTRS128
 _mm_maskz_load_sh(__mmask8 __U, const void *__A) {
   return (__m128h)__builtin_ia32_loadsh128_mask(
-      (__v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
+      (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
 }
 
 static __inline__ __m512h __DEFAULT_FN_ATTRS512
@@ -3347,3 +3349,4 @@
 #undef __DEFAULT_FN_ATTRS512
 
 #endif
+#endif
Index: clang/include/clang/Basic/BuiltinsX86.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -1791,7 +1791,7 @@
 TARGET_BUILTIN(__builtin_ia32_cmpph256_mask, "UsV16xV16xIiUs", "ncV:256:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpph128_mask, "UcV8xV8xIiUc", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpsh_mask, "UcV8xV8xIiUcIi", "ncV:128:", "avx512fp16")
-TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8x*V8xUc", "nV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_loadsh128_mask, "V8xV8xC*V8xUc", "nV:128:", "avx512fp16")
 TARGET_BUILTIN(__builtin_ia32_storesh128_mask, "vV8x*V8xUc", "nV:128:", "avx512fp16")
 
 TARGET_BUILTIN(__builtin_ia32_rcpph128_mask, "V8xV8xV8xUc", "ncV:128:", "avx512fp16,avx512vl")


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D132342.454362.patch
Type: text/x-patch
Size: 3617 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220822/e90b31d7/attachment.bin>