[clang] 91fff70 - [clang][X86] Replace vprot/vprol/vpror/vshld/vshrd intrinsics with __builtin_elementwise_fshl/fshr (#153229)

via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 13 02:28:33 PDT 2025


Author: Simon Pilgrim
Date: 2025-08-13T10:28:30+01:00
New Revision: 91fff707407ea4d3455d4675bc1310a7bc0b5d58

URL: https://github.com/llvm/llvm-project/commit/91fff707407ea4d3455d4675bc1310a7bc0b5d58
DIFF: https://github.com/llvm/llvm-project/commit/91fff707407ea4d3455d4675bc1310a7bc0b5d58.diff

LOG: [clang][X86] Replace vprot/vprol/vpror/vshld/vshrd intrinsics with __builtin_elementwise_fshl/fshr (#153229)

Replaces the XOP/AVX512 per-element rotation/funnel shift builtins with the generic __builtin_elementwise_fshl/fshr

We still have uniform immediate variants to handle next.

Part of #153152

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.td
    clang/lib/CodeGen/TargetBuiltins/X86.cpp
    clang/lib/Headers/avx512fintrin.h
    clang/lib/Headers/avx512vbmi2intrin.h
    clang/lib/Headers/avx512vlintrin.h
    clang/lib/Headers/avx512vlvbmi2intrin.h
    clang/lib/Headers/xopintrin.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index fc1ee3be7889f..4262bdaa7cdd9 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -945,10 +945,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
   def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">;
   def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">;
   def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
-  def vprotb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
-  def vprotw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
-  def vprotd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-  def vprotq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
   def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">;
   def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">;
   def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">;
@@ -1882,78 +1878,6 @@ let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVect
   def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
 }
 
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshldvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshldvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshldvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshldvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshldvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshldvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshldvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshldvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshldvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshrdvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshrdvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshrdvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshrdvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshrdvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshrdvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vpshrdvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpshrdvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
-}
-
-let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def vpshrdvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
-}
-
 let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
 }
@@ -2165,28 +2089,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
 }
 
 let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def prolvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
-  def prolvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
   def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
   def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
 }
 
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def prolvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def prolvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def prolvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def prolvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
 let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
 }
@@ -2203,27 +2109,6 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
   def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
 }
 
-let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
-  def prorvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">;
-  def prorvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def prorvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def prorvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def prorvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
-}
-
-let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def prorvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
-}
-
 let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
   def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
   def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;

diff  --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index f8d451bd20fa3..b9248a7d43f85 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -1932,10 +1932,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return SI;
   }
   // Rotate is a special case of funnel shift - 1st 2 args are the same.
-  case X86::BI__builtin_ia32_vprotb:
-  case X86::BI__builtin_ia32_vprotw:
-  case X86::BI__builtin_ia32_vprotd:
-  case X86::BI__builtin_ia32_vprotq:
   case X86::BI__builtin_ia32_vprotbi:
   case X86::BI__builtin_ia32_vprotwi:
   case X86::BI__builtin_ia32_vprotdi:
@@ -1946,12 +1942,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_prolq128:
   case X86::BI__builtin_ia32_prolq256:
   case X86::BI__builtin_ia32_prolq512:
-  case X86::BI__builtin_ia32_prolvd128:
-  case X86::BI__builtin_ia32_prolvd256:
-  case X86::BI__builtin_ia32_prolvd512:
-  case X86::BI__builtin_ia32_prolvq128:
-  case X86::BI__builtin_ia32_prolvq256:
-  case X86::BI__builtin_ia32_prolvq512:
     return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
   case X86::BI__builtin_ia32_prord128:
   case X86::BI__builtin_ia32_prord256:
@@ -1959,12 +1949,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_prorq128:
   case X86::BI__builtin_ia32_prorq256:
   case X86::BI__builtin_ia32_prorq512:
-  case X86::BI__builtin_ia32_prorvd128:
-  case X86::BI__builtin_ia32_prorvd256:
-  case X86::BI__builtin_ia32_prorvd512:
-  case X86::BI__builtin_ia32_prorvq128:
-  case X86::BI__builtin_ia32_prorvq256:
-  case X86::BI__builtin_ia32_prorvq512:
     return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
@@ -2357,29 +2341,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // Ops 0 and 1 are swapped.
     return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
 
-  case X86::BI__builtin_ia32_vpshldvd128:
-  case X86::BI__builtin_ia32_vpshldvd256:
-  case X86::BI__builtin_ia32_vpshldvd512:
-  case X86::BI__builtin_ia32_vpshldvq128:
-  case X86::BI__builtin_ia32_vpshldvq256:
-  case X86::BI__builtin_ia32_vpshldvq512:
-  case X86::BI__builtin_ia32_vpshldvw128:
-  case X86::BI__builtin_ia32_vpshldvw256:
-  case X86::BI__builtin_ia32_vpshldvw512:
-    return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
-
-  case X86::BI__builtin_ia32_vpshrdvd128:
-  case X86::BI__builtin_ia32_vpshrdvd256:
-  case X86::BI__builtin_ia32_vpshrdvd512:
-  case X86::BI__builtin_ia32_vpshrdvq128:
-  case X86::BI__builtin_ia32_vpshrdvq256:
-  case X86::BI__builtin_ia32_vpshrdvq512:
-  case X86::BI__builtin_ia32_vpshrdvw128:
-  case X86::BI__builtin_ia32_vpshrdvw256:
-  case X86::BI__builtin_ia32_vpshrdvw512:
-    // Ops 0 and 1 are swapped.
-    return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
-
   // Reductions
   case X86::BI__builtin_ia32_reduce_fadd_pd512:
   case X86::BI__builtin_ia32_reduce_fadd_ps512:

diff  --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9fc1df3acd3d0..90f883ba9f770 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -4926,7 +4926,7 @@ _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -4948,7 +4948,7 @@ _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -5038,7 +5038,7 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -5060,7 +5060,7 @@ _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512

diff  --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h
index 11598c888787c..f9a5f82b61f82 100644
--- a/clang/lib/Headers/avx512vbmi2intrin.h
+++ b/clang/lib/Headers/avx512vbmi2intrin.h
@@ -215,8 +215,8 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
-                                             (__v8di)__C);
+  return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B,
+                                             (__v8du)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -238,8 +238,8 @@ _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
-                                             (__v16si)__C);
+  return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B,
+                                             (__v16su)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -261,8 +261,8 @@ _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
-                                             (__v32hi)__C);
+  return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B,
+                                             (__v32hu)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -284,8 +284,9 @@ _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
-                                             (__v8di)__C);
+  // Ops __A and __B are swapped.
+  return (__m512i)__builtin_elementwise_fshr((__v8du)__B, (__v8du)__A,
+                                             (__v8du)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -307,8 +308,9 @@ _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
-                                             (__v16si)__C);
+  // Ops __A and __B are swapped.
+  return (__m512i)__builtin_elementwise_fshr((__v16su)__B, (__v16su)__A,
+                                             (__v16su)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -330,8 +332,9 @@ _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
 {
-  return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
-                                             (__v32hi)__C);
+  // Ops __A and __B are swapped.
+  return (__m512i)__builtin_elementwise_fshr((__v32hu)__B, (__v32hu)__A,
+                                             (__v32hu)__C);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS

diff  --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index cbad39acad84f..366adab1fab32 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -4310,7 +4310,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rolv_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4332,7 +4332,7 @@ _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4354,7 +4354,7 @@ _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rolv_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4376,7 +4376,7 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4578,7 +4578,7 @@ _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rorv_epi32 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4600,7 +4600,7 @@ _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
+  return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -4622,7 +4622,7 @@ _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rorv_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -4644,7 +4644,7 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
 {
-  return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256

diff  --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h
index 77af2d5cbd2a0..04db52c822640 100644
--- a/clang/lib/Headers/avx512vlvbmi2intrin.h
+++ b/clang/lib/Headers/avx512vlvbmi2intrin.h
@@ -415,8 +415,8 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B,
-                                             (__v4di)__C);
+  return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__B,
+                                             (__v4du)__C);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -438,8 +438,8 @@ _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B,
-                                             (__v2di)__C);
+  return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__B,
+                                             (__v2du)__C);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -461,8 +461,8 @@ _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B,
-                                             (__v8si)__C);
+  return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__B,
+                                             (__v8su)__C);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -484,8 +484,8 @@ _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B,
-                                             (__v4si)__C);
+  return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__B,
+                                             (__v4su)__C);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -507,8 +507,8 @@ _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B,
-                                             (__v16hi)__C);
+  return (__m256i)__builtin_elementwise_fshl((__v16hu)__A, (__v16hu)__B,
+                                             (__v16hu)__C);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -530,8 +530,8 @@ _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B,
-                                             (__v8hi)__C);
+  return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__B,
+                                             (__v8hu)__C);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -553,8 +553,9 @@ _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B,
-                                             (__v4di)__C);
+  // Ops __A and __B are swapped.
+  return (__m256i)__builtin_elementwise_fshr((__v4du)__B, (__v4du)__A,
+                                             (__v4du)__C);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -576,8 +577,9 @@ _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B,
-                                             (__v2di)__C);
+  // Ops __A and __B are swapped.
+  return (__m128i)__builtin_elementwise_fshr((__v2du)__B, (__v2du)__A,
+                                             (__v2du)__C);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -599,8 +601,9 @@ _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B,
-                                             (__v8si)__C);
+  // Ops __A and __B are swapped.
+  return (__m256i)__builtin_elementwise_fshr((__v8su)__B, (__v8su)__A,
+                                             (__v8su)__C);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -622,8 +625,9 @@ _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B,
-                                             (__v4si)__C);
+  // Ops __A and __B are swapped.
+  return (__m128i)__builtin_elementwise_fshr((__v4su)__B, (__v4su)__A,
+                                             (__v4su)__C);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -645,8 +649,9 @@ _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C)
 {
-  return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B,
-                                             (__v16hi)__C);
+  // Ops __A and __B are swapped.
+  return (__m256i)__builtin_elementwise_fshr((__v16hu)__B, (__v16hu)__A,
+                                             (__v16hu)__C);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -668,8 +673,9 @@ _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C)
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C)
 {
-  return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B,
-                                             (__v8hi)__C);
+  // Ops __A and __B are swapped.
+  return (__m128i)__builtin_elementwise_fshr((__v8hu)__B, (__v8hu)__A,
+                                             (__v8hu)__C);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128

diff  --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h
index fb88a9060574e..7015719659139 100644
--- a/clang/lib/Headers/xopintrin.h
+++ b/clang/lib/Headers/xopintrin.h
@@ -211,25 +211,25 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi8(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi16(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi32(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_rot_epi64(__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
 }
 
 #define _mm_roti_epi8(A, N) \


        


More information about the cfe-commits mailing list