[clang] a59faf2 - Revert rG6c174ab2ad0676b295f11f6c3913eff9289fa6b9 "[X86] Remove __builtin_ia32_padd/psub saturated intrinsics and use generic __builtin_elementwise_add/sub_sat"

Simon Pilgrim via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 8 06:45:40 PST 2022


Author: Simon Pilgrim
Date: 2022-02-08T14:45:28Z
New Revision: a59faf272e57c1a3dabda89fc22f9f9e61a350fe

URL: https://github.com/llvm/llvm-project/commit/a59faf272e57c1a3dabda89fc22f9f9e61a350fe
DIFF: https://github.com/llvm/llvm-project/commit/a59faf272e57c1a3dabda89fc22f9f9e61a350fe.diff

LOG: Revert rG6c174ab2ad0676b295f11f6c3913eff9289fa6b9 "[X86] Remove __builtin_ia32_padd/psub saturated intrinsics and use generic __builtin_elementwise_add/sub_sat"

Missed some legacy builtin tests that need cleaning up first

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/avx2intrin.h
    clang/lib/Headers/avx512bwintrin.h
    clang/lib/Headers/emmintrin.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 51d5db64f333..0669a96b942b 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -254,6 +254,14 @@ TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2")
@@ -539,6 +547,14 @@ TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "ncV:256:", "avx2")
@@ -993,9 +1009,17 @@ TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw
 TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128, "V2OiV2Oi", "ncV:128:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256, "V4OiV4Oi", "ncV:256:", "avx512cd,avx512vl")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 310a1ea583ce..efb327e6d770 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -12552,6 +12552,13 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
   return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
 }
 
+// Emit binary intrinsic with the same type used in result/args.
+static Value *EmitX86BinaryIntrinsic(CodeGenFunction &CGF,
+                                     ArrayRef<Value *> Ops, Intrinsic::ID IID) {
+  llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
+  return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
+}
+
 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
   const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
   StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
@@ -15005,6 +15012,34 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Load->setVolatile(true);
     return Load;
   }
+  case X86::BI__builtin_ia32_paddsb512:
+  case X86::BI__builtin_ia32_paddsw512:
+  case X86::BI__builtin_ia32_paddsb256:
+  case X86::BI__builtin_ia32_paddsw256:
+  case X86::BI__builtin_ia32_paddsb128:
+  case X86::BI__builtin_ia32_paddsw128:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::sadd_sat);
+  case X86::BI__builtin_ia32_paddusb512:
+  case X86::BI__builtin_ia32_paddusw512:
+  case X86::BI__builtin_ia32_paddusb256:
+  case X86::BI__builtin_ia32_paddusw256:
+  case X86::BI__builtin_ia32_paddusb128:
+  case X86::BI__builtin_ia32_paddusw128:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::uadd_sat);
+  case X86::BI__builtin_ia32_psubsb512:
+  case X86::BI__builtin_ia32_psubsw512:
+  case X86::BI__builtin_ia32_psubsb256:
+  case X86::BI__builtin_ia32_psubsw256:
+  case X86::BI__builtin_ia32_psubsb128:
+  case X86::BI__builtin_ia32_psubsw128:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::ssub_sat);
+  case X86::BI__builtin_ia32_psubusb512:
+  case X86::BI__builtin_ia32_psubusw512:
+  case X86::BI__builtin_ia32_psubusb256:
+  case X86::BI__builtin_ia32_psubusw256:
+  case X86::BI__builtin_ia32_psubusb128:
+  case X86::BI__builtin_ia32_psubusw128:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::usub_sat);
   case X86::BI__builtin_ia32_encodekey128_u32: {
     Intrinsic::ID IID = Intrinsic::x86_encodekey128;
 

diff  --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index f8521e7d72b5..e33514a60ff3 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -92,25 +92,25 @@ _mm256_add_epi64(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b);
+  return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b);
+  return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b);
+  return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
 }
 
 #define _mm256_alignr_epi8(a, b, n) \
@@ -628,25 +628,25 @@ _mm256_sub_epi64(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b);
+  return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b);
+  return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b);
+  return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256

diff  --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index c99ef9e3bd54..522ef100bab1 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -617,7 +617,7 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_adds_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B);
+  return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -639,7 +639,7 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_adds_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B);
+  return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -661,7 +661,7 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_adds_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B);
+  return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -683,7 +683,7 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_adds_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B);
+  return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -950,7 +950,7 @@ _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_subs_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B);
+  return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -972,7 +972,7 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_subs_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B);
+  return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -994,7 +994,7 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_subs_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B);
+  return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1016,7 +1016,7 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_subs_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B);
+  return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512

diff  --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 942a0f788a8c..4618b808efc4 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2225,7 +2225,7 @@ _mm_add_epi64(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epi8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_add_sat((__v16qs)__a, (__v16qs)__b);
+  return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Adds, with saturation, the corresponding elements of two 128-bit
@@ -2247,7 +2247,7 @@ _mm_adds_epi8(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_add_sat((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Adds, with saturation, the corresponding elements of two 128-bit
@@ -2268,7 +2268,7 @@ _mm_adds_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_add_sat((__v16qu)__a, (__v16qu)__b);
+  return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Adds, with saturation, the corresponding elements of two 128-bit
@@ -2289,7 +2289,7 @@ _mm_adds_epu8(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_adds_epu16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_add_sat((__v8hu)__a, (__v8hu)__b);
+  return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Computes the rounded averages of corresponding elements of two
@@ -2667,7 +2667,7 @@ _mm_sub_epi64(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epi8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_sub_sat((__v16qs)__a, (__v16qs)__b);
+  return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Subtracts corresponding 16-bit signed integer values in the input and
@@ -2688,7 +2688,7 @@ _mm_subs_epi8(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_sub_sat((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Subtracts corresponding 8-bit unsigned integer values in the input
@@ -2708,7 +2708,7 @@ _mm_subs_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_sub_sat((__v16qu)__a, (__v16qu)__b);
+  return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Subtracts corresponding 16-bit unsigned integer values in the input
@@ -2728,7 +2728,7 @@ _mm_subs_epu8(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_subs_epu16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_sub_sat((__v8hu)__a, (__v8hu)__b);
+  return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Performs a bitwise AND of two 128-bit integer vectors.


        


More information about the cfe-commits mailing list