[clang] 369d785 - [PowerPC] Optimal sequence for doubleword vec_all_{eq|ne} on Power7

Nemanja Ivanovic via cfe-commits cfe-commits at lists.llvm.org
Fri Oct 1 06:27:22 PDT 2021


Author: Nemanja Ivanovic
Date: 2021-10-01T08:27:15-05:00
New Revision: 369d785574f5a22c086d0c40268a39a64bdd7217

URL: https://github.com/llvm/llvm-project/commit/369d785574f5a22c086d0c40268a39a64bdd7217
DIFF: https://github.com/llvm/llvm-project/commit/369d785574f5a22c086d0c40268a39a64bdd7217.diff

LOG: [PowerPC] Optimal sequence for doubleword vec_all_{eq|ne} on Power7

These builtins produce inefficient code for CPU's prior to Power8
due to vcmpequd being unavailable. The predicate forms can actually
leverage the available vcmpequw along with xxlxor to produce a better
sequence.

Added: 
    

Modified: 
    clang/lib/Headers/altivec.h
    clang/test/CodeGen/builtins-ppc-vsx.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 6a179d86d71f9..5da4fbf72ce97 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -14815,42 +14815,43 @@ static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a,
 #ifdef __VSX__
 static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed long long __a,
                                               vector signed long long __b) {
+#ifdef __POWER8_VECTOR__
   return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, __b);
+#else
+  // No vcmpequd on Power7 so we xor the two vectors and compare against zero as
+  // 32-bit elements.
+  return vec_all_eq((vector signed int)vec_xor(__a, __b), (vector signed int)0);
+#endif
 }
 
 static __inline__ int __ATTRS_o_ai vec_all_eq(vector long long __a,
                                               vector bool long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, (vector long long)__b);
+  return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
                                               vector unsigned long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
-                                      (vector long long)__b);
+  return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
                                               vector bool long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
-                                      (vector long long)__b);
+  return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
                                               vector long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
-                                      (vector long long)__b);
+  return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
                                               vector unsigned long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
-                                      (vector long long)__b);
+  return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
                                               vector bool long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
-                                      (vector long long)__b);
+  return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
 }
 #endif
 
@@ -17038,43 +17039,43 @@ static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a,
 #ifdef __VSX__
 static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
                                               vector signed long long __b) {
+#ifdef __POWER8_VECTOR__
   return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, __b);
+#else
+  // Take advantage of the optimized sequence for vec_all_eq when vcmpequd is
+  // not available.
+  return !vec_all_eq(__a, __b);
+#endif
 }
 
 static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
                                               vector unsigned long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector long long)__a,
-                                      (vector long long)__b);
+  return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
                                               vector bool long long __b) {
-  return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a,
-                                      (vector signed long long)__b);
+  return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
                                               vector bool long long __b) {
-  return __builtin_altivec_vcmpequd_p(
-      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+  return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
                                               vector signed long long __b) {
-  return __builtin_altivec_vcmpequd_p(
-      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+  return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
                                               vector unsigned long long __b) {
-  return __builtin_altivec_vcmpequd_p(
-      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+  return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
 }
 
 static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
                                               vector bool long long __b) {
-  return __builtin_altivec_vcmpequd_p(
-      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+  return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
 }
 #endif
 

diff  --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c
index 0cbcdae504c91..784f3ca2219c9 100644
--- a/clang/test/CodeGen/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/builtins-ppc-vsx.c
@@ -2589,32 +2589,46 @@ void test_p8overloads_backwards_compat() {
   /* ----------------------- predicates --------------------------- */
   /* vec_all_eq */
   res_i = vec_all_eq(vsll, vsll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
 
   res_i = vec_all_eq(vsll, vbll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
 
   res_i = vec_all_eq(vull, vull);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
 
   res_i = vec_all_eq(vull, vbll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
 
   res_i = vec_all_eq(vbll, vsll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
 
   res_i = vec_all_eq(vbll, vull);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
 
   res_i = vec_all_eq(vbll, vbll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
 
   /* vec_all_ne */
   res_i = vec_all_ne(vsll, vsll);
@@ -2679,32 +2693,60 @@ void test_p8overloads_backwards_compat() {
 
   /* vec_any_ne */
   res_i = vec_any_ne(vsll, vsll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK: xor i1
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor i1
 
   res_i = vec_any_ne(vsll, vbll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK: xor i1
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor i1
 
   res_i = vec_any_ne(vull, vull);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK: xor i1
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor i1
 
   res_i = vec_any_ne(vull, vbll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK: xor i1
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor i1
 
   res_i = vec_any_ne(vbll, vsll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK: xor i1
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor i1
 
   res_i = vec_any_ne(vbll, vull);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK: xor i1
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor i1
 
   res_i = vec_any_ne(vbll, vbll);
-  // CHECK: @llvm.ppc.altivec.vcmpequd.p
-  // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+  // CHECK: xor <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK: xor i1
+  // CHECK-LE: xor <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+  // CHECK-LE: xor i1
 
   /* vec_all_ge */
   res_i = vec_all_ge(vsll, vsll);


        


More information about the cfe-commits mailing list