[clang] [Headers][X86] Allow AVX512 masked blend intrinsics to be used in constexpr (PR #156234)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Aug 31 03:17:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Harsh Tiwary (notnotharsh)
<details>
<summary>Changes</summary>
This patch enables AVX-512 masked blend intrinsics to be usable in constant expressions (`constexpr`) across various vector widths (128-bit, 256-bit, 512-bit). It updates the respective Clang headers to include the `__DEFAULT_FN_ATTRS_CONSTEXPR` annotation where applicable, and supplements the change with thorough `TEST_CONSTEXPR` checks in the X86 CodeGen test suite to validate constexpr evaluation.
Fixes #<!-- -->155796.
---
Patch is 25.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156234.diff
10 Files Affected:
- (modified) clang/lib/Headers/avx512bwintrin.h (+4-6)
- (modified) clang/lib/Headers/avx512fintrin.h (+8-12)
- (modified) clang/lib/Headers/avx512fp16intrin.h (+1-1)
- (modified) clang/lib/Headers/avx512vlbwintrin.h (+8-12)
- (modified) clang/lib/Headers/avx512vlfp16intrin.h (+3-4)
- (modified) clang/lib/Headers/avx512vlintrin.h (+16-16)
- (modified) clang/test/CodeGen/X86/avx512bw-builtins.c (+46)
- (modified) clang/test/CodeGen/X86/avx512vl-builtins.c (+99)
- (modified) clang/test/CodeGen/X86/avx512vlbw-builtins.c (+48-2)
- (modified) clang/test/CodeGen/X86/avx512vlfp16-builtins.c (+43)
``````````diff
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 2d2bf59fc5b76..5ad2856f8b996 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -465,17 +465,15 @@ _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
(__v64qi) __W,
(__v64qi) __A);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
(__v32hi) __W,
(__v32hi) __A);
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e23b1c0381ab1..1ce468c22bdd6 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3366,33 +3366,29 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
/* Vector Blend */
-static __inline __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
-{
+static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
(__v8df) __W,
(__v8df) __A);
}
-static __inline __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
-{
+static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) {
return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
(__v16sf) __W,
(__v16sf) __A);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
(__v8di) __W,
(__v8di) __A);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
(__v16si) __W,
(__v16si) __A);
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 6989b86a7b68c..a6b0820de90cf 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -3306,7 +3306,7 @@ _mm512_reduce_min_ph(__m512h __V) {
return __builtin_ia32_reduce_fmin_ph512(__V);
}
-static __inline__ __m512h __DEFAULT_FN_ATTRS512
+static __inline__ __m512h __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
(__v32hf)__A);
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index f96468c56c392..81a77fa573a90 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -454,33 +454,29 @@ _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
(__v16qi) __W,
(__v16qi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
(__v32qi) __W,
(__v32qi) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
(__v8hi) __W,
(__v8hi) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
(__v16hi) __W,
(__v16hi) __A);
diff --git a/clang/lib/Headers/avx512vlfp16intrin.h b/clang/lib/Headers/avx512vlfp16intrin.h
index 98ad9b54eef39..6be559d9a1a33 100644
--- a/clang/lib/Headers/avx512vlfp16intrin.h
+++ b/clang/lib/Headers/avx512vlfp16intrin.h
@@ -1984,14 +1984,13 @@ _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) {
(__v8sf)__C, (__mmask8)__U);
}
-static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U,
- __m128h __A,
- __m128h __W) {
+static __inline__ __m128h __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) {
return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W,
(__v8hf)__A);
}
-static __inline__ __m256h __DEFAULT_FN_ATTRS256
+static __inline__ __m256h __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) {
return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W,
(__v16hf)__A);
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 5a5b09e274563..f92a01293d1e9 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -1631,57 +1631,57 @@ _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
(__v4si) __W,
(__v4si) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
(__v8si) __W,
(__v8si) __A);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
(__v2df) __W,
(__v2df) __A);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
(__v4df) __W,
(__v4df) __A);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
(__v4sf) __W,
(__v4sf) __A);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
(__v8sf) __W,
(__v8sf) __A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
(__v2di) __W,
(__v2di) __A);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
(__v4di) __W,
(__v4di) __A);
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 6e10af2c282e0..0d01a26077bf9 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -847,11 +847,57 @@ __m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v64qi(
+ _mm512_mask_blend_epi8(
+ (__mmask64) 0x00000001,
+ (__m512i)(__v64qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v64qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
__m512i test_mm512_mask_blend_epi16(__mmask32 __U, __m512i __A, __m512i __W) {
// CHECK-LABEL: test_mm512_mask_blend_epi16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v32hi(
+ _mm512_mask_blend_epi16(
+ (__mmask32) 0x00000001,
+ (__m512i)(__v32hi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v32hi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
+__m512i test_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_epi32
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_blend_epi32(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_blend_epi32(
+ (__mmask16) 0x0001,
+ (__m512i)(__v16si) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v16si){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
+__m512i test_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_epi64
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_mask_blend_epi64(__U, __A, __W);
+}
+
+TEST_CONSTEXPR(match_v8di(
+ _mm512_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m512i)(__v8di){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
+
__m512i test_mm512_abs_epi8(__m512i __A) {
// CHECK-LABEL: test_mm512_abs_epi8
// CHECK: [[ABS:%.*]] = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %{{.*}}, i1 false)
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 6858f114c29c4..55a1d7fc184fe 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -3521,41 +3521,140 @@ __m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_blend_epi32(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v4si(
+ _mm_mask_blend_epi32(
+ (__mmask8)0x01,
+ (__m128i)(__v4si){2, 2, 2, 2},
+ (__m128i)(__v4si){ 10,11,12,13 }
+ ),
+ 10, 2, 2, 2
+));
__m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi32
// CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_blend_epi32(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v8si(
+ _mm256_mask_blend_epi32(
+ (__mmask8)0x01,
+ (__m256i)(__v8si){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m256i)(__v8si){ 10,11,12,13,14,15,16,17 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
__m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
// CHECK-LABEL: test_mm_mask_blend_pd
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_blend_pd(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m128d)(__v2df){2.0, 2.0},
+ (__m128d)(__v2df){10.0, 20.0}
+ ),
+ 10.0, 2.0
+));
__m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
// CHECK-LABEL: test_mm256_mask_blend_pd
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_blend_pd(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m256d)(__v4df){2.0, 2.0, 2.0, 2.0},
+ (__m256d)(__v4df){10.0, 11.0, 12.0, 13.0}
+ ),
+ 10.0, 2.0, 2.0, 2.0
+));
+
+__m512d test_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_pd
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask_blend_pd(__U, __A, __W);
+}
+
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_blend_pd(
+ (__mmask8)0x01,
+ (__m512d)(__v8df){2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0},
+ (__m512d)(__v8df){10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0}
+ ),
+ 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
+));
+
__m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
// CHECK-LABEL: test_mm_mask_blend_ps
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_blend_ps(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_mask_blend_ps(
+ (__mmask8)0x01,
+ (__m128)(__v4sf){2.0f, 2.0f, 2.0f, 2.0f},
+ (__m128)(__v4sf){10.0f, 11.0f, 12.0f, 13.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f
+));
+
__m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
// CHECK-LABEL: test_mm256_mask_blend_ps
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_blend_ps(__U,__A,__W);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_mask_blend_ps(
+ (__mmask8)0x01,
+ (__m256)(__v8sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
+ (__m256)(__v8sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
+));
+
+__m512 test_mm512_mask_blend_ps(__mmask8 __U, __m512 __A, __m512 __W) {
+ // CHECK-LABEL: test_mm512_mask_blend_ps
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask_blend_ps(__U, __A, __W);
+}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_blend_ps(
+ (__mmask16)0x01,
+ (__m512)(__v16sf){2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f},
+ (__m512)(__v16sf){10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+ 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f}
+ ),
+ 10.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f
+));
+
__m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi64
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_blend_epi64(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v2di(
+ _mm_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m128i)(__v2di){2, 2},
+ (__m128i)(__v2di){ 10,11 }
+ ),
+ 10, 2
+));
__m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi64
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_blend_epi64(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v4di(
+ _mm256_mask_blend_epi64(
+ (__mmask8)0x01,
+ (__m256i)(__v4di){2, 2, 2, 2},
+ (__m256i)(__v4di){ 10,11,12,13 }
+ ),
+ 10, 2, 2, 2
+));
__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {
// CHECK-LABEL: test_mm_mask_compress_pd
// CHECK: @llvm.x86.avx512.mask.compress
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 5d697440bfc1b..59e37c47e2eb3 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -882,23 +882,56 @@ __m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v16qi(
+ _mm_mask_blend_epi8(
+ (__mmask16)0x0001,
+ (__m128i)(__v16qi){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m128i)(__v16qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
+
__m256i test_mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_blend_epi8(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v32qi(
+ _mm256_mask_blend_epi8(
+ (__mmask32) 0x00000001,
+ (__m256i)(__v32qi) {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+ (__m256i)(__v32qi){ 10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25}
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+));
__m128i test_mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W) {
// CHECK-LABEL: test_mm_mask_blend_epi16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v8hi(
+ _mm_mask_blend_epi16(
+ (__mmask8)0x01,
+ (__m128i)(__v8hi){2, 2, 2, 2, 2, 2, 2, 2},
+ (__m128i)(__v8hi){ 10,11,12,13,14,15,16,17 }
+ ),
+ 10, 2, 2, 2, 2, 2, 2, 2
+));
__m256i test_mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W) {
// CHECK-LABEL: test_mm256_mask_blend_epi16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_blend_epi16(__U,__A,__W);
}
+TEST_CONSTEXPR(match_v16hi(
+ _mm256_m...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156234
More information about the cfe-commits
mailing list