[clang] 91fff70 - [clang][X86] Replace vprot/vprol/vpror/vshld/vshrd intrinsics with __builtin_elementwise_fshl/fshr (#153229)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 13 02:28:33 PDT 2025
Author: Simon Pilgrim
Date: 2025-08-13T10:28:30+01:00
New Revision: 91fff707407ea4d3455d4675bc1310a7bc0b5d58
URL: https://github.com/llvm/llvm-project/commit/91fff707407ea4d3455d4675bc1310a7bc0b5d58
DIFF: https://github.com/llvm/llvm-project/commit/91fff707407ea4d3455d4675bc1310a7bc0b5d58.diff
LOG: [clang][X86] Replace vprot/vprol/vpror/vshld/vshrd intrinsics with __builtin_elementwise_fshl/fshr (#153229)
Replaces the XOP/AVX512 per-element rotation/funnel shift builtins with the generic __builtin_elementwise_fshl/fshr
We still have uniform immediate variants to handle next.
Part of #153152
Added:
Modified:
clang/include/clang/Basic/BuiltinsX86.td
clang/lib/CodeGen/TargetBuiltins/X86.cpp
clang/lib/Headers/avx512fintrin.h
clang/lib/Headers/avx512vbmi2intrin.h
clang/lib/Headers/avx512vlintrin.h
clang/lib/Headers/avx512vlvbmi2intrin.h
clang/lib/Headers/xopintrin.h
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index fc1ee3be7889f..4262bdaa7cdd9 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -945,10 +945,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
- def vprotb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
- def vprotw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
- def vprotd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
- def vprotq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
@@ -1882,78 +1878,6 @@ let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVect
def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
}
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshldvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshldvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshldvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshldvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshldvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshldvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshldvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshldvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshldvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshrdvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshrdvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshrdvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshrdvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshrdvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshrdvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshrdvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshrdvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshrdvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
-}
-
let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
}
@@ -2165,28 +2089,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
}
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def prolvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
- def prolvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
}
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def prolvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def prolvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def prolvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def prolvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
}
@@ -2203,27 +2109,6 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def prorvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
- def prorvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def prorvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def prorvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def prorvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def prorvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index f8d451bd20fa3..b9248a7d43f85 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -1932,10 +1932,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return SI;
}
// Rotate is a special case of funnel shift - 1st 2 args are the same.
- case X86::BI__builtin_ia32_vprotb:
- case X86::BI__builtin_ia32_vprotw:
- case X86::BI__builtin_ia32_vprotd:
- case X86::BI__builtin_ia32_vprotq:
case X86::BI__builtin_ia32_vprotbi:
case X86::BI__builtin_ia32_vprotwi:
case X86::BI__builtin_ia32_vprotdi:
@@ -1946,12 +1942,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_prolq128:
case X86::BI__builtin_ia32_prolq256:
case X86::BI__builtin_ia32_prolq512:
- case X86::BI__builtin_ia32_prolvd128:
- case X86::BI__builtin_ia32_prolvd256:
- case X86::BI__builtin_ia32_prolvd512:
- case X86::BI__builtin_ia32_prolvq128:
- case X86::BI__builtin_ia32_prolvq256:
- case X86::BI__builtin_ia32_prolvq512:
return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
case X86::BI__builtin_ia32_prord128:
case X86::BI__builtin_ia32_prord256:
@@ -1959,12 +1949,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_prorq128:
case X86::BI__builtin_ia32_prorq256:
case X86::BI__builtin_ia32_prorq512:
- case X86::BI__builtin_ia32_prorvd128:
- case X86::BI__builtin_ia32_prorvd256:
- case X86::BI__builtin_ia32_prorvd512:
- case X86::BI__builtin_ia32_prorvq128:
- case X86::BI__builtin_ia32_prorvq256:
- case X86::BI__builtin_ia32_prorvq512:
return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
@@ -2357,29 +2341,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
// Ops 0 and 1 are swapped.
return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
- case X86::BI__builtin_ia32_vpshldvd128:
- case X86::BI__builtin_ia32_vpshldvd256:
- case X86::BI__builtin_ia32_vpshldvd512:
- case X86::BI__builtin_ia32_vpshldvq128:
- case X86::BI__builtin_ia32_vpshldvq256:
- case X86::BI__builtin_ia32_vpshldvq512:
- case X86::BI__builtin_ia32_vpshldvw128:
- case X86::BI__builtin_ia32_vpshldvw256:
- case X86::BI__builtin_ia32_vpshldvw512:
- return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
-
- case X86::BI__builtin_ia32_vpshrdvd128:
- case X86::BI__builtin_ia32_vpshrdvd256:
- case X86::BI__builtin_ia32_vpshrdvd512:
- case X86::BI__builtin_ia32_vpshrdvq128:
- case X86::BI__builtin_ia32_vpshrdvq256:
- case X86::BI__builtin_ia32_vpshrdvq512:
- case X86::BI__builtin_ia32_vpshrdvw128:
- case X86::BI__builtin_ia32_vpshrdvw256:
- case X86::BI__builtin_ia32_vpshrdvw512:
- // Ops 0 and 1 are swapped.
- return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
-
// Reductions
case X86::BI__builtin_ia32_reduce_fadd_pd512:
case X86::BI__builtin_ia32_reduce_fadd_ps512:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9fc1df3acd3d0..90f883ba9f770 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -4926,7 +4926,7 @@ _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_rorv_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
+ return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -4948,7 +4948,7 @@ _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_rorv_epi64 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
+ return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -5038,7 +5038,7 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_rolv_epi32 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
+ return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -5060,7 +5060,7 @@ _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_rolv_epi64 (__m512i __A, __m512i __B)
{
- return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
+ return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h
index 11598c888787c..f9a5f82b61f82 100644
--- a/clang/lib/Headers/avx512vbmi2intrin.h
+++ b/clang/lib/Headers/avx512vbmi2intrin.h
@@ -215,8 +215,8 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
- (__v8di)__C);
+ return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B,
+ (__v8du)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -238,8 +238,8 @@ _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
- (__v16si)__C);
+ return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B,
+ (__v16su)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -261,8 +261,8 @@ _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
- (__v32hi)__C);
+ return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B,
+ (__v32hu)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -284,8 +284,9 @@ _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
- (__v8di)__C);
+ // Ops __A and __B are swapped.
+ return (__m512i)__builtin_elementwise_fshr((__v8du)__B, (__v8du)__A,
+ (__v8du)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -307,8 +308,9 @@ _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
- (__v16si)__C);
+ // Ops __A and __B are swapped.
+ return (__m512i)__builtin_elementwise_fshr((__v16su)__B, (__v16su)__A,
+ (__v16su)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -330,8 +332,9 @@ _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
{
- return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
- (__v32hi)__C);
+ // Ops __A and __B are swapped.
+ return (__m512i)__builtin_elementwise_fshr((__v32hu)__B, (__v32hu)__A,
+ (__v32hu)__C);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index cbad39acad84f..366adab1fab32 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -4310,7 +4310,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_rolv_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
+ return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4332,7 +4332,7 @@ _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_rolv_epi32 (__m256i __A, __m256i __B)
{
- return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
+ return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4354,7 +4354,7 @@ _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_rolv_epi64 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
+ return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4376,7 +4376,7 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_rolv_epi64 (__m256i __A, __m256i __B)
{
- return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
+ return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4578,7 +4578,7 @@ _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_rorv_epi32 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
+ return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4600,7 +4600,7 @@ _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_rorv_epi32 (__m256i __A, __m256i __B)
{
- return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
+ return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4622,7 +4622,7 @@ _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_rorv_epi64 (__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
+ return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4644,7 +4644,7 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_rorv_epi64 (__m256i __A, __m256i __B)
{
- return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
+ return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h
index 77af2d5cbd2a0..04db52c822640 100644
--- a/clang/lib/Headers/avx512vlvbmi2intrin.h
+++ b/clang/lib/Headers/avx512vlvbmi2intrin.h
@@ -415,8 +415,8 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
- (__v4di)__C);
+ return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__B,
+ (__v4du)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -438,8 +438,8 @@ _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
- (__v2di)__C);
+ return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__B,
+ (__v2du)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -461,8 +461,8 @@ _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
- (__v8si)__C);
+ return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__B,
+ (__v8su)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -484,8 +484,8 @@ _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
- (__v4si)__C);
+ return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__B,
+ (__v4su)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -507,8 +507,8 @@ _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
- (__v16hi)__C);
+ return (__m256i)__builtin_elementwise_fshl((__v16hu)__A, (__v16hu)__B,
+ (__v16hu)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -530,8 +530,8 @@ _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
- (__v8hi)__C);
+ return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__B,
+ (__v8hu)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -553,8 +553,9 @@ _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
- (__v4di)__C);
+ // Ops __A and __B are swapped.
+ return (__m256i)__builtin_elementwise_fshr((__v4du)__B, (__v4du)__A,
+ (__v4du)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -576,8 +577,9 @@ _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
- (__v2di)__C);
+ // Ops __A and __B are swapped.
+ return (__m128i)__builtin_elementwise_fshr((__v2du)__B, (__v2du)__A,
+ (__v2du)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -599,8 +601,9 @@ _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
- (__v8si)__C);
+ // Ops __A and __B are swapped.
+ return (__m256i)__builtin_elementwise_fshr((__v8su)__B, (__v8su)__A,
+ (__v8su)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -622,8 +625,9 @@ _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
- (__v4si)__C);
+ // Ops __A and __B are swapped.
+ return (__m128i)__builtin_elementwise_fshr((__v4su)__B, (__v4su)__A,
+ (__v4su)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -645,8 +649,9 @@ _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
{
- return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
- (__v16hi)__C);
+ // Ops __A and __B are swapped.
+ return (__m256i)__builtin_elementwise_fshr((__v16hu)__B, (__v16hu)__A,
+ (__v16hu)__C);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -668,8 +673,9 @@ _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
{
- return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
- (__v8hi)__C);
+ // Ops __A and __B are swapped.
+ return (__m128i)__builtin_elementwise_fshr((__v8hu)__B, (__v8hu)__A,
+ (__v8hu)__C);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h
index fb88a9060574e..7015719659139 100644
--- a/clang/lib/Headers/xopintrin.h
+++ b/clang/lib/Headers/xopintrin.h
@@ -211,25 +211,25 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi8(__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
+ return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi16(__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
+ return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi32(__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
+ return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_rot_epi64(__m128i __A, __m128i __B)
{
- return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
+ return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
}
#define _mm_roti_epi8(A, N) \
More information about the cfe-commits
mailing list