[clang] b7f7d96 - [Headers][X86] Allow SSE2/AVX2/AVX512F/AVX512BW/AVX512DQ integer multiply intrinsics to be used in constexpr (#156369)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 2 02:38:37 PDT 2025
Author: Koustav Chowdhury
Date: 2025-09-02T10:38:33+01:00
New Revision: b7f7d9601010ae3034f1e1d79f446565e2ecbed2
URL: https://github.com/llvm/llvm-project/commit/b7f7d9601010ae3034f1e1d79f446565e2ecbed2
DIFF: https://github.com/llvm/llvm-project/commit/b7f7d9601010ae3034f1e1d79f446565e2ecbed2.diff
LOG: [Headers][X86] Allow SSE2/AVX2/AVX512F/AVX512BW/AVX512DQ integer multiply intrinsics to be used in constexpr (#156369)
Fixes #155411
Added:
Modified:
clang/lib/Headers/avx2intrin.h
clang/lib/Headers/avx512bwintrin.h
clang/lib/Headers/avx512dqintrin.h
clang/lib/Headers/avx512fintrin.h
clang/lib/Headers/avx512vlbwintrin.h
clang/lib/Headers/avx512vldqintrin.h
clang/lib/Headers/avx512vlintrin.h
clang/lib/Headers/smmintrin.h
clang/test/CodeGen/X86/avx2-builtins.c
clang/test/CodeGen/X86/avx512bw-builtins.c
clang/test/CodeGen/X86/avx512dq-builtins.c
clang/test/CodeGen/X86/avx512f-builtins.c
clang/test/CodeGen/X86/avx512vl-builtins.c
clang/test/CodeGen/X86/avx512vlbw-builtins.c
clang/test/CodeGen/X86/avx512vldq-builtins.c
clang/test/CodeGen/X86/sse41-builtins.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 384faa35d246f..4d03103ac1e08 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1766,9 +1766,8 @@ _mm256_mullo_epi16(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [8 x i32] containing one of the source operands.
/// \returns A 256-bit vector of [8 x i32] containing the products.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mullo_epi32 (__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mullo_epi32(__m256i __a, __m256i __b) {
return (__m256i)((__v8su)__a * (__v8su)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index eabe215947761..8e9cc395abb2a 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -451,14 +451,14 @@ _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
return (__m512i) ((__v32hu) __A * (__v32hu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_mullo_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_mullo_epi16(__A, __B),
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index 87d16b474d466..b0fe903d82431 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -156,19 +156,19 @@ _store_mask8(__mmask8 *__A, __mmask8 __B) {
*(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mullo_epi64(__m512i __A, __m512i __B) {
return (__m512i) ((__v8du) __A * (__v8du) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_mullo_epi64(__A, __B),
(__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_mullo_epi64(__A, __B),
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 0006e334022b6..1968b2b3d91d2 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1450,23 +1450,20 @@ _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)_mm512_setzero_si512 ());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mullo_epi32 (__m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mullo_epi32(__m512i __A, __m512i __B) {
return (__m512i) ((__v16su) __A * (__v16su) __B);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_mullo_epi32(__A, __B),
(__v16si)_mm512_setzero_si512());
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
(__v16si)_mm512_mullo_epi32(__A, __B),
(__v16si)__W);
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index ea6144008200a..888086dc214f1 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -426,28 +426,28 @@ _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mullo_epi16(__A, __B),
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mullo_epi16(__A, __B),
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mullo_epi16(__A, __B),
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mullo_epi16(__A, __B),
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index ceebd09e1d3af..e93eb10c31ce2 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -32,38 +32,38 @@
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#endif
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_mullo_epi64(__m256i __A, __m256i __B) {
return (__m256i) ((__v4du) __A * (__v4du) __B);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_mullo_epi64(__A, __B),
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_mullo_epi64(__A, __B),
(__v4di)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mullo_epi64 (__m128i __A, __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mullo_epi64(__m128i __A, __m128i __B) {
return (__m128i) ((__v2du) __A * (__v2du) __B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_mullo_epi64(__A, __B),
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_mullo_epi64(__A, __B),
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 6e16d2d4a0620..41f11757abbea 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -429,17 +429,15 @@ _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_mullo_epi32(__A, __B),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
(__v8si)_mm256_mullo_epi32(__A, __B),
(__v8si)__W);
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index f68dd7ed2bcc9..e8f1f57c97c08 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -548,8 +548,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1,
/// \param __V2
/// A 128-bit integer vector.
/// \returns A 128-bit integer vector containing the products of both operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1,
- __m128i __V2) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_mullo_epi32(__m128i __V1, __m128i __V2) {
return (__m128i)((__v4su)__V1 * (__v4su)__V2);
}
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index a39ce513837ea..c9b6f69684c9e 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -976,6 +976,7 @@ __m256i test_mm256_mullo_epi32(__m256i a, __m256i b) {
// CHECK: mul <8 x i32>
return _mm256_mullo_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mullo_epi32((__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-16, -14, +12, +10, -8, +6, -4, +2}), -16, 28, 36, -40, -40, -36, -28, -16));
__m256i test_mm256_or_si256(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_or_si256
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 264a4578c2cd2..5f8497cdef37f 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -839,6 +839,7 @@ __m512i test_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __
//CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_mullo_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_mullo_epi16((__m512i)(__v32hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32}, 0x0000FFFF, (__m512i)(__v32hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33}, (__m512i)(__v32hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34}), -6, -12, -20, -30, -42, -56, -72, -90, -110, -132, -156, -182, -210, -240, -272, -306, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32));
__m512i test_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_maskz_mullo_epi16
@@ -846,6 +847,7 @@ __m512i test_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
//CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_mullo_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mullo_epi16(0x0000FFFF, (__m512i)(__v32hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33}, (__m512i)(__v32hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18, -19, +20, -21, +22, -23, +24, -25, +26, -27, +28, -29, +30, -31, +32, -33, +34}), -6, -12, -20, -30, -42, -56, -72, -90, -110, -132, -156, -182, -210, -240, -272, -306, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_blend_epi8(__mmask64 __U, __m512i __A, __m512i __W) {
// CHECK-LABEL: test_mm512_mask_blend_epi8
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index d2bd7808ca91d..88f160a6deda5 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -248,6 +248,7 @@ __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
// CHECK: mul <8 x i64>
return (__m512i) _mm512_mullo_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mullo_epi64((__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, -30, -42, +56, -72));
__m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mullo_epi64
@@ -255,6 +256,7 @@ __m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return (__m512i) _mm512_mask_mullo_epi64(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_mullo_epi64((__m512i)(__v8di){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, -4, +5, -6, +7, -8, +9}), -2, -6, -12, -20, -500, +600, -700, +800));
__m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_mullo_epi64
@@ -262,6 +264,7 @@ __m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return (__m512i) _mm512_maskz_mullo_epi64(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_mullo_epi64(0x0F, (__m512i)(__v8di){+1, -2, +3, -4, +5, -6, +7, -8}, (__m512i)(__v8di){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, 0, 0, 0, 0));
__m512d test_mm512_xor_pd (__m512d __A, __m512d __B) {
// CHECK-LABEL: test_mm512_xor_pd
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 123b8de8396a6..a60bb83113087 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -3093,6 +3093,7 @@ __m512i test_mm512_maskz_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B) {
//CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_mullo_epi32(__k,__A,__B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_mullo_epi32(0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, +13, -14, +15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, 0, 0, 0, 0, 0, 0, 0, 0));
__m512i test_mm512_mask_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m512i __src) {
//CHECK-LABEL: test_mm512_mask_mullo_epi32
@@ -3100,12 +3101,14 @@ __m512i test_mm512_mask_mullo_epi32 (__mmask16 __k,__m512i __A, __m512i __B, __m
//CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_mullo_epi32(__src,__k,__A,__B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_mullo_epi32((__m512i)(__v16si){-100, +200, -300, +400, -500, +600, -700, +800, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600}, 0x00FF, (__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, +13, -14, +15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, -900, +1000, -1100, +1200, -1300, +1400, -1500, +1600));
__m512i test_mm512_mullo_epi32(__m512i __A, __m512i __B) {
//CHECK-LABEL: test_mm512_mullo_epi32
//CHECK: mul <16 x i32>
return _mm512_mullo_epi32(__A,__B);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mullo_epi32((__m512i)(__v16si){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m512i)(__v16si){-2, +3, +4, +5, -6, +7, +8, +9, -10, +11, +12, +13, -14, +15, +16, +17}), -2, -6, +12, -20, -30, -42, +56, -72, -90, -110, +132, -156, -182, -210, +240, -272));
__m512i test_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mullox_epi64
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 78e01b944ec5d..b8dcabbf7285e 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -834,6 +834,7 @@ __m256i test_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) {
//CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_mullo_epi32(__M, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_mullo_epi32(0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, 0, 0, 0, 0));
__m256i test_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
__m256i __B) {
@@ -842,6 +843,7 @@ __m256i test_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
//CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_mullo_epi32(__W, __M, __A, __B);
}
+TEST_CONSTEXPR(match_v8si(_mm256_mask_mullo_epi32((__m256i)(__v8si){-100, +200, -300, +400, -500, +600, -700, +800}, 0x0F, (__m256i)(__v8si){+1, -2, +3, -4, +5, -6, +7, -8}, (__m256i)(__v8si){-2, +3, +4, +5, -6, +7, +8, +9}), -2, -6, +12, -20, -500, +600, -700, +800));
__m256i test_mm256_and_epi32 (__m256i __A, __m256i __B) {
//CHECK-LABEL: test_mm256_and_epi32
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index dd24ed86912be..4527c9175dcaa 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -861,6 +861,7 @@ __m256i test_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __
//CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_mullo_epi16(__W, __U , __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_mullo_epi16((__m256i)(__v16hi){-1, +2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16}, 0x00FF, (__m256i)(__v16hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17}, (__m256i)(__v16hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18}), -6, -12, -20, -30, -42, -56, -72, -90, -9, +10, -11, +12, -13, +14, -15, +16));
__m256i test_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
//CHECK-LABEL: test_mm256_maskz_mullo_epi16
@@ -868,6 +869,7 @@ __m256i test_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
//CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_mullo_epi16(__U , __A, __B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mullo_epi16(0x00FF, (__m256i)(__v16hi){+2, -3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17}, (__m256i)(__v16hi){-3, +4, -5, +6, -7, +8, -9, +10, -11, +12, -13, +14, -15, +16, -17, +18}), -6, -12, -20, -30, -42, -56, -72, -90, 0, 0, 0, 0, 0, 0, 0, 0));
__m128i test_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
//CHECK-LABEL: test_mm_mask_mullo_epi16
@@ -875,6 +877,7 @@ __m128i test_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128
//CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_mullo_epi16(__W, __U , __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_mullo_epi16((__m128i)(__v8hi){-1, +2, -3, +4, -5, +6, -7, +8}, 0x0F, (__m128i)(__v8hi){+2, -3, +4, -5, +6, -7, +8, -9}, (__m128i)(__v8hi){-3, +4, -5, +6, -7, +8, -9, +10}), -6, -12, -20, -30, -5, +6, -7, +8));
__m128i test_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
//CHECK-LABEL: test_mm_maskz_mullo_epi16
@@ -882,6 +885,7 @@ __m128i test_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
//CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_mullo_epi16(__U , __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_mullo_epi16(0x0F, (__m128i)(__v8hi){+2, -3, +4, -5, +6, -7, +8, -9}, (__m128i)(__v8hi){-3, +4, -5, +6, -7, +8, -9, +10}), -6, -12, -20, -30, 0, 0, 0, 0));
__m128i test_mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W) {
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 720999cea6ab4..e1e8916bf60b3 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -17,6 +17,7 @@ __m256i test_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
// CHECK: mul <4 x i64>
return _mm256_mullo_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mullo_epi64((__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, +12, +20));
__m256i test_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_mullo_epi64
@@ -24,6 +25,7 @@ __m256i test_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return (__m256i) _mm256_mask_mullo_epi64 ( __W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v4di(_mm256_mask_mullo_epi64((__m256i)(__v4di){-100, +200, -300, +400}, 0x03, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, -300, +400));
__m256i test_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_mullo_epi64
@@ -31,12 +33,14 @@ __m256i test_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return (__m256i) _mm256_maskz_mullo_epi64 (__U, __A, __B);
}
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_mullo_epi64(0x03, (__m256i)(__v4di){+1, -2, +3, -4}, (__m256i)(__v4di){-2, +3, +4, -5}), -2, -6, 0, 0));
__m128i test_mm_mullo_epi64 (__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mullo_epi64
// CHECK: mul <2 x i64>
return (__m128i) _mm_mullo_epi64(__A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mullo_epi64((__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-3, +4}), -3, -8));
__m128i test_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_mullo_epi64
@@ -44,6 +48,7 @@ __m128i test_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return (__m128i) _mm_mask_mullo_epi64 ( __W, __U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_mask_mullo_epi64((__m128i)(__v2di){-100, +200}, 0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-3, +4}), -3, +200));
__m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_mullo_epi64
@@ -51,6 +56,7 @@ __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return (__m128i) _mm_maskz_mullo_epi64 (__U, __A, __B);
}
+TEST_CONSTEXPR(match_v2di(_mm_maskz_mullo_epi64(0x01, (__m128i)(__v2di){+1, -2}, (__m128i)(__v2di){-3, +4}), -3, 0));
__m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: test_mm256_mask_andnot_pd
diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c
index d3e4edc2fd172..5f53614872604 100644
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@@ -361,6 +361,7 @@ __m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
// CHECK: mul <4 x i32>
return _mm_mullo_epi32(x, y);
}
+TEST_CONSTEXPR(match_v4si(_mm_mullo_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-16, +14, +12, -10}), -16, -28, +36, +40));
__m128i test_mm_packus_epi32(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_packus_epi32
More information about the cfe-commits
mailing list