[clang] 4727d29 - [X86] Remove __builtin_ia32_pabs intrinsics and use generic __builtin_elementwise_abs

Simon Pilgrim via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 21 03:59:17 PST 2022


Author: Simon Pilgrim
Date: 2022-01-21T11:59:08Z
New Revision: 4727d29d908f9dd608dd97a58c0af1ad579fd3ca

URL: https://github.com/llvm/llvm-project/commit/4727d29d908f9dd608dd97a58c0af1ad579fd3ca
DIFF: https://github.com/llvm/llvm-project/commit/4727d29d908f9dd608dd97a58c0af1ad579fd3ca.diff

LOG: [X86] Remove __builtin_ia32_pabs intrinsics and use generic __builtin_elementwise_abs

D111986 added the generic `__builtin_elementwise_abs()` intrinsic with the same integer absolute behaviour as the SSE/AVX instructions (abs(INT_MIN) == INT_MIN)

This patch removes the `__builtin_ia32_pabs*` intrinsics and just uses `__builtin_elementwise_abs` - the existing tests see no changes:
```
__m256i test_mm256_abs_epi8(__m256i a) {
  // CHECK-LABEL: test_mm256_abs_epi8
  // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false)
  return _mm256_abs_epi8(a);
}
```
This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`).

Differential Revision: https://reviews.llvm.org/D117791

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/avx2intrin.h
    clang/lib/Headers/avx512bwintrin.h
    clang/lib/Headers/avx512fintrin.h
    clang/lib/Headers/avx512vlintrin.h
    clang/lib/Headers/tmmintrin.h
    clang/test/CodeGen/builtins-x86.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index bc6208be45606..9b7c763b0c6c7 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -296,9 +296,6 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3")
 
 TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
 TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse")
@@ -558,9 +555,6 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
 
 // AVX2
 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
@@ -927,8 +921,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
@@ -1045,8 +1037,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5
 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
 
-TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
@@ -1198,8 +1188,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a49c035002786..49f054ec1a982 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14285,21 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops[0]);
     }
   }
-  case X86::BI__builtin_ia32_pabsb128:
-  case X86::BI__builtin_ia32_pabsw128:
-  case X86::BI__builtin_ia32_pabsd128:
-  case X86::BI__builtin_ia32_pabsb256:
-  case X86::BI__builtin_ia32_pabsw256:
-  case X86::BI__builtin_ia32_pabsd256:
-  case X86::BI__builtin_ia32_pabsq128:
-  case X86::BI__builtin_ia32_pabsq256:
-  case X86::BI__builtin_ia32_pabsb512:
-  case X86::BI__builtin_ia32_pabsw512:
-  case X86::BI__builtin_ia32_pabsd512:
-  case X86::BI__builtin_ia32_pabsq512: {
-    Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
-  }
   case X86::BI__builtin_ia32_pmaxsb128:
   case X86::BI__builtin_ia32_pmaxsw128:
   case X86::BI__builtin_ia32_pmaxsd128:

diff  --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 5064c87c2bb19..c9ad74ce3fa42 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -26,19 +26,19 @@
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi8(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
+    return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi16(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
+    return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi32(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
+    return (__m256i)__builtin_elementwise_abs((__v8si)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256

diff  --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 6aee8aed84871..53319eb23011d 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi8 (__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
+  return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi16 (__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
+  return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512

diff  --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index df298640523b7..9b02a7cffc64d 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -26,6 +26,10 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v64qs __attribute__((__vector_size__(64)));
+
 typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
 typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
 typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
@@ -1846,7 +1850,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi64(__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
+  return (__m512i)__builtin_elementwise_abs((__v8di)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1868,7 +1872,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi32(__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
+  return (__m512i)__builtin_elementwise_abs((__v16si) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512

diff  --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 0519dba59081a..eddb99902e3d5 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_abs_epi64 (__m128i __A) {
-  return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
+  return (__m128i)__builtin_elementwise_abs((__v2di)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi64 (__m256i __A) {
-  return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
+  return (__m256i)__builtin_elementwise_abs((__v4di)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256

diff  --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index bcffa8187801c..cb9be2349de5a 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi8(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
+    return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
 }
 
 /// Computes the absolute value of each of the packed 16-bit signed
@@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi16(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
+    return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
 }
 
 /// Computes the absolute value of each of the packed 32-bit signed
@@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi32(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
+    return (__m128i)__builtin_elementwise_abs((__v4si)__a);
 }
 
 /// Concatenates the two 128-bit integer vector operands, and

diff  --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 61b9d53c74f9d..bfcd30072fc1f 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -259,11 +259,8 @@ void f0() {
   tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s);
   tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i);
   tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i);
-  tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c);
   tmp_V8c = __builtin_ia32_pabsb(tmp_V8c);
-  tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s);
   tmp_V4s = __builtin_ia32_pabsw(tmp_V4s);
-  tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i);
   tmp_V2i = __builtin_ia32_pabsd(tmp_V2i);
   tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi);
   tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi);


        


More information about the cfe-commits mailing list