[clang] [llvm] [X86][AVX10.2] Support AVX10.2-MINMAX new instructions. (PR #101598)
Freddy Ye via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 4 18:15:12 PDT 2024
================
@@ -0,0 +1,188 @@
+/*===--------------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics
+ *-----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2minmaxintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2MINMAXINTRIN_H
+#define __AVX10_2MINMAXINTRIN_H
+
+#define _mm_minmaxne_pbh(A, B, C) \
+ ((__m128bh)__builtin_ia32_vminmaxnepbf16128( \
+ (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)))
+
+#define _mm_mask_minmaxne_pbh(W, U, A, B, C) \
+ ((__m128bh)__builtin_ia32_selectpbf_128( \
+ (__mmask8)(U), \
+ (__v8bf)__builtin_ia32_vminmaxnepbf16128( \
+ (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)), \
+ (__v8bf)(W)))
+
+#define _mm_maskz_minmaxne_pbh(U, A, B, C) \
+ ((__m128bh)__builtin_ia32_selectpbf_128( \
+ (__mmask8)(U), \
+ (__v8bf)__builtin_ia32_vminmaxnepbf16128( \
+ (__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), (int)(C)), \
+ (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps())))
+
+#define _mm256_minmaxne_pbh(A, B, C) \
+ ((__m256bh)__builtin_ia32_vminmaxnepbf16256( \
+ (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)))
+
+#define _mm256_mask_minmaxne_pbh(W, U, A, B, C) \
+ ((__m256bh)__builtin_ia32_selectpbf_256( \
+ (__mmask16)(U), \
+ (__v16bf)__builtin_ia32_vminmaxnepbf16256( \
+ (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)), \
+ (__v16bf)(W)))
+
+#define _mm256_maskz_minmaxne_pbh(U, A, B, C) \
+ ((__m256bh)__builtin_ia32_selectpbf_256( \
+ (__mmask16)(U), \
+ (__v16bf)__builtin_ia32_vminmaxnepbf16256( \
+ (__m256bh)(__v16bf)(A), (__m256bh)(__v16bf)(B), (int)(C)), \
+ (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps())))
+
+#define _mm_minmax_pd(A, B, C) \
+ ((__m128d)__builtin_ia32_vminmaxpd128_mask( \
+ (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), (__mmask8)(-1)))
+
+#define _mm_mask_minmax_pd(W, U, A, B, C) \
+ ((__m128d)__builtin_ia32_vminmaxpd128_mask( \
+ (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_minmax_pd(U, A, B, C) \
+ ((__m128d)__builtin_ia32_vminmaxpd128_mask( \
+ (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_minmax_pd(A, B, C) \
+ ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \
+ (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)_mm256_setzero_pd(), (__mmask8)(-1), _MM_FROUND_NO_EXC))
----------------
FreddyLeaf wrote:
[adfe6cd](https://github.com/llvm/llvm-project/pull/101598/commits/adfe6cd518f7f75e8dbbdfcdc6ddadae1a27fc4a)
https://github.com/llvm/llvm-project/pull/101598
More information about the llvm-commits
mailing list