[clang] [Headers][X86] Enable constexpr handling for MMX/SSE/AVX/AVX512 avg intrinsics (PR #157464)
Bhasawut Singhaphan via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 8 12:09:45 PDT 2025
https://github.com/markbhasawut updated https://github.com/llvm/llvm-project/pull/157464
>From 06cd06ed672d4ff354cff831c37fc72d3ba83521 Mon Sep 17 00:00:00 2001
From: Bhasawut Singhaphan <bhasawut at gmail.com>
Date: Mon, 8 Sep 2025 15:51:33 +0700
Subject: [PATCH 1/4] [Headers][X86] Enable constexpr handling for
MMX/SSE/AVX/AVX512 avg intrinsics
Updates the avg builtins to support constant expression handling.
---
clang/include/clang/Basic/BuiltinsX86.td | 13 +++++++------
clang/lib/AST/ExprConstant.cpp | 8 ++++++++
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b4ff550d27279..995708e8374fe 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -93,8 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
let Features = "sse2" in {
- def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
- def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
@@ -106,6 +104,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
+ def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
}
@@ -575,8 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
- def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
- def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -618,6 +616,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
}
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
+ def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
+
def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -1307,8 +1308,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
- def pavgb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
- def pavgw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
@@ -1350,6 +1349,8 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
}
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+ def pavgb512 : X86Builtin<"_Vector<64, unsigned char>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">;
+ def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ca930737474df..47a3dfeae24d6 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11694,6 +11694,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+ case clang::X86::BI__builtin_ia32_pavgb128:
+ case clang::X86::BI__builtin_ia32_pavgw128:
+ case clang::X86::BI__builtin_ia32_pavgb256:
+ case clang::X86::BI__builtin_ia32_pavgw256:
+ case clang::X86::BI__builtin_ia32_pavgb512:
+ case clang::X86::BI__builtin_ia32_pavgw512:
+ return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU);
+
case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
case clang::X86::BI__builtin_ia32_pmulhuw512:
>From d681acaed5eb4bb5ce0fe3e7b1f2b2ed5dad098a Mon Sep 17 00:00:00 2001
From: Bhasawut Singhaphan <bhasawut at gmail.com>
Date: Mon, 8 Sep 2025 09:51:38 +0000
Subject: [PATCH 2/4] [clang][bytecode] Handle ia32_pavg* builtins
This PR handles the __builtin_ia32_pavg builtins inside VectorExprEvaluator::VisitCallExpr.
---
clang/lib/AST/ByteCode/InterpBuiltin.cpp | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a0dcdace854b9..b712222b32992 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3299,6 +3299,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+ case clang::X86::BI__builtin_ia32_pavgb128:
+ case clang::X86::BI__builtin_ia32_pavgw128:
+ case clang::X86::BI__builtin_ia32_pavgb256:
+ case clang::X86::BI__builtin_ia32_pavgw256:
+ case clang::X86::BI__builtin_ia32_pavgb512:
+ case clang::X86::BI__builtin_ia32_pavgw512:
+ return interp__builtin_elementwise_int_binop(S, OpPC, Call,
+ llvm::APIntOps::avgCeilU);
+
case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
case clang::X86::BI__builtin_ia32_pmulhuw512:
>From c310203fadf948e3cccd24afad3fe2e4bf978086 Mon Sep 17 00:00:00 2001
From: Bhasawut Singhaphan <bhasawut at gmail.com>
Date: Mon, 8 Sep 2025 21:25:53 +0700
Subject: [PATCH 3/4] Update MMX/SSE/AVX/AVX512 AVG intrinsics to be used in
constexpr
---
clang/lib/Headers/avx2intrin.h | 10 +++----
clang/lib/Headers/avx512bwintrin.h | 32 +++++++++-------------
clang/lib/Headers/avx512vlbwintrin.h | 40 +++++++++++-----------------
clang/lib/Headers/emmintrin.h | 8 +++---
clang/lib/Headers/xmmintrin.h | 10 +++----
5 files changed, 40 insertions(+), 60 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 2cacdc3c4596c..ee2dcd70d6daa 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -496,9 +496,8 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu8(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_avg_epu8(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
}
@@ -522,9 +521,8 @@ _mm256_avg_epu8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu16(__m256i __a, __m256i __b)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_avg_epu16(__m256i __a, __m256i __b) {
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
}
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 31e0a2242240c..94d02d2557b05 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -690,47 +690,39 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_avg_epu8 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_avg_epu8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_avg_epu8(__A, __B),
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_avg_epu8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_avg_epu16 (__m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_avg_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
- __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_avg_epu16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_avg_epu16(__A, __B),
(__v32hi) _mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 846cda67bce3f..02d44527895fc 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -795,65 +795,57 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_avg_epu8(__A, __B),
(__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_avg_epu8(__A, __B),
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_avg_epu8(__A, __B),
(__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_avg_epu8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_avg_epu16(__A, __B),
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_avg_epu16(__A, __B),
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_avg_epu16(__A, __B),
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_avg_epu16(__A, __B),
(__v16hi)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index a366e0df407a9..1a631db6705bd 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2247,8 +2247,8 @@ _mm_adds_epu16(__m128i __a, __m128i __b) {
/// A 128-bit unsigned [16 x i8] vector.
/// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
/// averages of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_avg_epu8(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
}
@@ -2266,8 +2266,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
/// A 128-bit unsigned [8 x i16] vector.
/// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
/// averages of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a,
- __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_avg_epu16(__m128i __a, __m128i __b) {
return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 4b52904315451..b4d2a2386fd08 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2539,9 +2539,8 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_avg_pu8(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_avg_pu8(__m64 __a, __m64 __b) {
return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a),
(__v16qi)__anyext128(__b)));
}
@@ -2559,9 +2558,8 @@ _mm_avg_pu8(__m64 __a, __m64 __b)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_avg_pu16(__m64 __a, __m64 __b)
-{
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_avg_pu16(__m64 __a, __m64 __b) {
return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a),
(__v8hi)__anyext128(__b)));
}
>From abf5cc72d5fee356b2732c172d29d699615e098c Mon Sep 17 00:00:00 2001
From: Bhasawut Singhaphan <bhasawut at gmail.com>
Date: Tue, 9 Sep 2025 00:47:03 +0700
Subject: [PATCH 4/4] Convert builtin types to unsigned to reflect changes
---
clang/lib/Headers/avx2intrin.h | 4 +--
clang/lib/Headers/avx512bwintrin.h | 24 ++++++++----------
clang/lib/Headers/avx512vlbwintrin.h | 38 +++++++++++++---------------
clang/lib/Headers/emmintrin.h | 5 ++--
clang/lib/Headers/xmmintrin.h | 8 +++---
5 files changed, 37 insertions(+), 42 deletions(-)
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index ee2dcd70d6daa..fc12a9bf15e57 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -498,7 +498,7 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
/// \returns A 256-bit integer vector containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_avg_epu8(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
+ return (__m256i)__builtin_ia32_pavgb256((__v32qu)__a, (__v32qu)__b);
}
/// Computes the averages of the corresponding unsigned 16-bit integers in
@@ -523,7 +523,7 @@ _mm256_avg_epu8(__m256i __a, __m256i __b) {
/// \returns A 256-bit vector of [16 x i16] containing the result.
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_avg_epu16(__m256i __a, __m256i __b) {
- return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
+ return (__m256i)__builtin_ia32_pavgw256((__v16hu)__a, (__v16hu)__b);
}
/// Merges 8-bit integer values from either of the two 256-bit vectors
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 94d02d2557b05..a4b34835e8363 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -692,40 +692,38 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_avg_epu8(__m512i __A, __m512i __B) {
- return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
+ return (__m512i)__builtin_ia32_pavgb512((__v64qu)__A, (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
- return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
- (__v64qi)_mm512_avg_epu8(__A, __B),
- (__v64qi)__W);
+ return (__m512i)__builtin_ia32_selectb_512(
+ (__mmask64)__U, (__v64qu)_mm512_avg_epu8(__A, __B), (__v64qu)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
- (__v64qi)_mm512_avg_epu8(__A, __B),
- (__v64qi)_mm512_setzero_si512());
+ (__v64qu)_mm512_avg_epu8(__A, __B),
+ (__v64qu)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_avg_epu16(__m512i __A, __m512i __B) {
- return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
+ return (__m512i)__builtin_ia32_pavgw512((__v32hu)__A, (__v32hu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_avg_epu16(__A, __B),
- (__v32hi)__W);
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hu)_mm512_avg_epu16(__A, __B), (__v32hu)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_avg_epu16(__A, __B),
- (__v32hi) _mm512_setzero_si512());
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hu)_mm512_avg_epu16(__A, __B),
+ (__v32hu)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 02d44527895fc..13fe0abc480e0 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -797,58 +797,54 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
- return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
- (__v16qi)_mm_avg_epu8(__A, __B),
- (__v16qi)__W);
+ return (__m128i)__builtin_ia32_selectb_128(
+ (__mmask16)__U, (__v16qu)_mm_avg_epu8(__A, __B), (__v16qu)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
- (__v16qi)_mm_avg_epu8(__A, __B),
- (__v16qi)_mm_setzero_si128());
+ (__v16qu)_mm_avg_epu8(__A, __B),
+ (__v16qu)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
- return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
- (__v32qi)_mm256_avg_epu8(__A, __B),
- (__v32qi)__W);
+ return (__m256i)__builtin_ia32_selectb_256(
+ (__mmask32)__U, (__v32qu)_mm256_avg_epu8(__A, __B), (__v32qu)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
- (__v32qi)_mm256_avg_epu8(__A, __B),
- (__v32qi)_mm256_setzero_si256());
+ (__v32qu)_mm256_avg_epu8(__A, __B),
+ (__v32qu)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
- return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
- (__v8hi)_mm_avg_epu16(__A, __B),
- (__v8hi)__W);
+ return (__m128i)__builtin_ia32_selectw_128(
+ (__mmask8)__U, (__v8hu)_mm_avg_epu16(__A, __B), (__v8hu)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
- (__v8hi)_mm_avg_epu16(__A, __B),
- (__v8hi)_mm_setzero_si128());
+ (__v8hu)_mm_avg_epu16(__A, __B),
+ (__v8hu)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_avg_epu16(__A, __B),
- (__v16hi)__W);
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hu)_mm256_avg_epu16(__A, __B), (__v16hu)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_avg_epu16(__A, __B),
- (__v16hi)_mm256_setzero_si256());
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hu)_mm256_avg_epu16(__A, __B),
+ (__v16hu)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 1a631db6705bd..40cba7040f6c5 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -10,6 +10,7 @@
#ifndef __EMMINTRIN_H
#define __EMMINTRIN_H
+#include "xmmintrin.h"
#if !defined(__i386__) && !defined(__x86_64__)
#error "This header is only meant to be used on x86 and x64 architecture"
#endif
@@ -2249,7 +2250,7 @@ _mm_adds_epu16(__m128i __a, __m128i __b) {
/// averages of both parameters.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_avg_epu8(__m128i __a, __m128i __b) {
- return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
+ return (__m128i)__builtin_ia32_pavgb128((__v16qu)__a, (__v16qu)__b);
}
/// Computes the rounded averages of corresponding elements of two
@@ -2268,7 +2269,7 @@ _mm_avg_epu8(__m128i __a, __m128i __b) {
/// averages of both parameters.
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_avg_epu16(__m128i __a, __m128i __b) {
- return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
+ return (__m128i)__builtin_ia32_pavgw128((__v8hu)__a, (__v8hu)__b);
}
/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index b4d2a2386fd08..d45fd31dc4430 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2541,8 +2541,8 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
/// \returns A 64-bit integer vector containing the averages of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_avg_pu8(__m64 __a, __m64 __b) {
- return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a),
- (__v16qi)__anyext128(__b)));
+ return __trunc64(__builtin_ia32_pavgb128((__v16qu)__anyext128(__a),
+ (__v16qu)__anyext128(__b)));
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
@@ -2560,8 +2560,8 @@ _mm_avg_pu8(__m64 __a, __m64 __b) {
/// \returns A 64-bit integer vector containing the averages of both operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_avg_pu16(__m64 __a, __m64 __b) {
- return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a),
- (__v8hi)__anyext128(__b)));
+ return __trunc64(__builtin_ia32_pavgw128((__v8hu)__anyext128(__a),
+ (__v8hu)__anyext128(__b)));
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
More information about the cfe-commits
mailing list