[clang] [Headers][X86] Allow AVX vector concatenation intrinsics to be used in constexpr (PR #158020)

Thu Sep 11 02:15:09 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Shawn (kimsh02)

<details>
<summary>Changes</summary>

Fix #157705 

Vector concatentation intrinsics made constexpr and test coverage added to avx-builtins.c for the following:

```
_mm256_set_m128  _mm256_setr_m128
_mm256_set_m128d _mm256_setr_m128d
_mm256_set_m128i _mm256_setr_m128i
```


---
Full diff: https://github.com/llvm/llvm-project/pull/158020.diff


2 Files Affected:

- (modified) clang/lib/Headers/avxintrin.h (+12-18) 
- (modified) clang/test/CodeGen/X86/avx-builtins.c (+6) 


``````````diff

diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index e9d8be320bec9..a7f70994be9db 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4822,9 +4822,8 @@ _mm256_zextsi128_si256(__m128i __a) {
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_set_m128 (__m128 __hi, __m128 __lo)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_set_m128(__m128 __hi, __m128 __lo) {
   return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
@@ -4843,9 +4842,8 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_set_m128d (__m128d __hi, __m128d __lo)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_set_m128d(__m128d __hi, __m128d __lo) {
   return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);
 }
 
@@ -4863,9 +4861,8 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo)
 ///    A 128-bit integer vector to be copied to the lower 128 bits of the
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_set_m128i (__m128i __hi, __m128i __lo)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_set_m128i(__m128i __hi, __m128i __lo) {
   return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);
 }
 
@@ -4886,9 +4883,8 @@ _mm256_set_m128i (__m128i __hi, __m128i __lo)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_setr_m128 (__m128 __lo, __m128 __hi)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_setr_m128(__m128 __lo, __m128 __hi) {
   return _mm256_set_m128(__hi, __lo);
 }
 
@@ -4909,9 +4905,8 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_setr_m128d (__m128d __lo, __m128d __hi)
-{
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_setr_m128d(__m128d __lo, __m128d __hi) {
   return (__m256d)_mm256_set_m128d(__hi, __lo);
 }
 
@@ -4930,9 +4925,8 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi)
 ///    A 128-bit integer vector to be copied to the upper 128 bits of the
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
-_mm256_setr_m128i (__m128i __lo, __m128i __hi)
-{
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_setr_m128i(__m128i __lo, __m128i __hi) {
   return (__m256i)_mm256_set_m128i(__hi, __lo);
 }
 
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index f255dbe1b2adc..8223ab2b52cac 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1540,18 +1540,21 @@ __m256 test_mm256_set_m128(__m128 A, __m128 B) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_set_m128(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_set_m128((__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){1.0f, 2.0f, 3.0f, 4.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f));
 
 __m256d test_mm256_set_m128d(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm256_set_m128d
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_set_m128d(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_set_m128d((__m128d){10.0, 20.0}, (__m128d){1.0, 2.0}), 1.0, 2.0, 10.0, 20.0));
 
 __m256i test_mm256_set_m128i(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm256_set_m128i
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_set_m128i(A, B);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_set_m128i((__m128i){10LL, 20LL}, (__m128i){1LL, 2LL}), 1LL, 2LL, 10LL, 20LL));
 
 __m256d test_mm256_set_pd(double A0, double A1, double A2, double A3) {
   // CHECK-LABEL: test_mm256_set_pd
@@ -1778,18 +1781,21 @@ __m256 test_mm256_setr_m128(__m128 A, __m128 B) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_setr_m128(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_setr_m128((__m128){1.0f, 2.0f, 3.0f, 4.0f}, (__m128){10.0f, 20.0f, 30.0f, 40.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f));
 
 __m256d test_mm256_setr_m128d(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm256_setr_m128d
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_setr_m128d(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_setr_m128d((__m128d){1.0, 2.0}, (__m128d){10.0, 20.0}), 1.0, 2.0, 10.0, 20.0));
 
 __m256i test_mm256_setr_m128i(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm256_setr_m128i
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_setr_m128i(A, B);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_setr_m128i((__m128i){1LL, 2LL}, (__m128i){10LL, 20LL}), 1LL, 2LL, 10LL, 20LL));
 
 __m256d test_mm256_setr_pd(double A0, double A1, double A2, double A3) {
   // CHECK-LABEL: test_mm256_setr_pd

``````````

</details>


https://github.com/llvm/llvm-project/pull/158020