[clang] [Headers][X86] Allow AVX vector concatenation intrinsics to be used in constexpr (PR #158020)

Thu Sep 11 02:34:58 PDT 2025

https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/158020

>From be9cd0cf8feb76b2de4e9ab095580802a5989071 Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 11 Sep 2025 01:39:06 -0700
Subject: [PATCH 1/2] [Headers][X86] Allow AVX vector concatenation intrinsics
 to be used in constexpr

---
 clang/lib/Headers/avxintrin.h         | 12 ++++++------
 clang/test/CodeGen/X86/avx-builtins.c |  6 ++++++
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index e9d8be320bec9..f510efa3f7543 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4822,7 +4822,7 @@ _mm256_zextsi128_si256(__m128i __a) {
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_m128 (__m128 __hi, __m128 __lo)
 {
   return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
@@ -4843,7 +4843,7 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_m128d (__m128d __hi, __m128d __lo)
 {
   return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);
@@ -4863,7 +4863,7 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo)
 ///    A 128-bit integer vector to be copied to the lower 128 bits of the
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_set_m128i (__m128i __hi, __m128i __lo)
 {
   return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);
@@ -4886,7 +4886,7 @@ _mm256_set_m128i (__m128i __hi, __m128i __lo)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_m128 (__m128 __lo, __m128 __hi)
 {
   return _mm256_set_m128(__hi, __lo);
@@ -4909,7 +4909,7 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi)
 ///    128 bits of the result.
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_m128d (__m128d __lo, __m128d __hi)
 {
   return (__m256d)_mm256_set_m128d(__hi, __lo);
@@ -4930,7 +4930,7 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi)
 ///    A 128-bit integer vector to be copied to the upper 128 bits of the
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm256_setr_m128i (__m128i __lo, __m128i __hi)
 {
   return (__m256i)_mm256_set_m128i(__hi, __lo);
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index f255dbe1b2adc..8223ab2b52cac 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1540,18 +1540,21 @@ __m256 test_mm256_set_m128(__m128 A, __m128 B) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_set_m128(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_set_m128((__m128){10.0f, 20.0f, 30.0f, 40.0f}, (__m128){1.0f, 2.0f, 3.0f, 4.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f));
 
 __m256d test_mm256_set_m128d(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm256_set_m128d
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_set_m128d(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_set_m128d((__m128d){10.0, 20.0}, (__m128d){1.0, 2.0}), 1.0, 2.0, 10.0, 20.0));
 
 __m256i test_mm256_set_m128i(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm256_set_m128i
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_set_m128i(A, B);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_set_m128i((__m128i){10LL, 20LL}, (__m128i){1LL, 2LL}), 1LL, 2LL, 10LL, 20LL));
 
 __m256d test_mm256_set_pd(double A0, double A1, double A2, double A3) {
   // CHECK-LABEL: test_mm256_set_pd
@@ -1778,18 +1781,21 @@ __m256 test_mm256_setr_m128(__m128 A, __m128 B) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_setr_m128(A, B);
 }
+TEST_CONSTEXPR(match_m256(_mm256_setr_m128((__m128){1.0f, 2.0f, 3.0f, 4.0f}, (__m128){10.0f, 20.0f, 30.0f, 40.0f}), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f));
 
 __m256d test_mm256_setr_m128d(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm256_setr_m128d
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_setr_m128d(A, B);
 }
+TEST_CONSTEXPR(match_m256d(_mm256_setr_m128d((__m128d){1.0, 2.0}, (__m128d){10.0, 20.0}), 1.0, 2.0, 10.0, 20.0));
 
 __m256i test_mm256_setr_m128i(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm256_setr_m128i
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm256_setr_m128i(A, B);
 }
+TEST_CONSTEXPR(match_m256i(_mm256_setr_m128i((__m128i){1LL, 2LL}, (__m128i){10LL, 20LL}), 1LL, 2LL, 10LL, 20LL));
 
 __m256d test_mm256_setr_pd(double A0, double A1, double A2, double A3) {
   // CHECK-LABEL: test_mm256_setr_pd

>From 34fcdf5ef25e5452eca2f441fc39140f5038776c Mon Sep 17 00:00:00 2001
From: kimsh02 <kimshawn02 at icloud.com>
Date: Thu, 11 Sep 2025 02:02:19 -0700
Subject: [PATCH 2/2] Clang-format

---
 clang/lib/Headers/avxintrin.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index f510efa3f7543..a7f70994be9db 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4823,8 +4823,7 @@ _mm256_zextsi128_si256(__m128i __a) {
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
 static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_set_m128 (__m128 __hi, __m128 __lo)
-{
+_mm256_set_m128(__m128 __hi, __m128 __lo) {
   return (__m256) __builtin_shufflevector((__v4sf)__lo, (__v4sf)__hi, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
@@ -4844,8 +4843,7 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo)
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
 static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_set_m128d (__m128d __hi, __m128d __lo)
-{
+_mm256_set_m128d(__m128d __hi, __m128d __lo) {
   return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3);
 }
 
@@ -4864,8 +4862,7 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo)
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
 static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_set_m128i (__m128i __hi, __m128i __lo)
-{
+_mm256_set_m128i(__m128i __hi, __m128i __lo) {
   return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3);
 }
 
@@ -4887,8 +4884,7 @@ _mm256_set_m128i (__m128i __hi, __m128i __lo)
 /// \returns A 256-bit floating-point vector of [8 x float] containing the
 ///    concatenated result.
 static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_setr_m128 (__m128 __lo, __m128 __hi)
-{
+_mm256_setr_m128(__m128 __lo, __m128 __hi) {
   return _mm256_set_m128(__hi, __lo);
 }
 
@@ -4910,8 +4906,7 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi)
 /// \returns A 256-bit floating-point vector of [4 x double] containing the
 ///    concatenated result.
 static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_setr_m128d (__m128d __lo, __m128d __hi)
-{
+_mm256_setr_m128d(__m128d __lo, __m128d __hi) {
   return (__m256d)_mm256_set_m128d(__hi, __lo);
 }
 
@@ -4931,8 +4926,7 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi)
 ///    result.
 /// \returns A 256-bit integer vector containing the concatenated result.
 static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm256_setr_m128i (__m128i __lo, __m128i __hi)
-{
+_mm256_setr_m128i(__m128i __lo, __m128i __hi) {
   return (__m256i)_mm256_set_m128i(__hi, __lo);
 }