[clang] 369d785 - [PowerPC] Optimal sequence for doubleword vec_all_{eq|ne} on Power7
Nemanja Ivanovic via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 1 06:27:22 PDT 2021
Author: Nemanja Ivanovic
Date: 2021-10-01T08:27:15-05:00
New Revision: 369d785574f5a22c086d0c40268a39a64bdd7217
URL: https://github.com/llvm/llvm-project/commit/369d785574f5a22c086d0c40268a39a64bdd7217
DIFF: https://github.com/llvm/llvm-project/commit/369d785574f5a22c086d0c40268a39a64bdd7217.diff
LOG: [PowerPC] Optimal sequence for doubleword vec_all_{eq|ne} on Power7
These builtins produce inefficient code for CPU's prior to Power8
due to vcmpequd being unavailable. The predicate forms can actually
leverage the available vcmpequw along with xxlxor to produce a better
sequence.
Added:
Modified:
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-vsx.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 6a179d86d71f9..5da4fbf72ce97 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -14815,42 +14815,43 @@ static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a,
#ifdef __VSX__
static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed long long __a,
vector signed long long __b) {
+#ifdef __POWER8_VECTOR__
return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, __b);
+#else
+ // No vcmpequd on Power7 so we xor the two vectors and compare against zero as
+ // 32-bit elements.
+ return vec_all_eq((vector signed int)vec_xor(__a, __b), (vector signed int)0);
+#endif
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
vector long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
- (vector long long)__b);
+ return vec_all_eq((vector signed long long)__a, (vector signed long long)__b);
}
#endif
@@ -17038,43 +17039,43 @@ static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a,
#ifdef __VSX__
static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
vector signed long long __b) {
+#ifdef __POWER8_VECTOR__
return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, __b);
+#else
+ // Take advantage of the optimized sequence for vec_all_eq when vcmpequd is
+ // not available.
+ return !vec_all_eq(__a, __b);
+#endif
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector long long)__a,
- (vector long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a,
- (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
vector signed long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
vector unsigned long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
vector bool long long __b) {
- return __builtin_altivec_vcmpequd_p(
- __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
+ return vec_any_ne((vector signed long long)__a, (vector signed long long)__b);
}
#endif
diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c
index 0cbcdae504c91..784f3ca2219c9 100644
--- a/clang/test/CodeGen/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/builtins-ppc-vsx.c
@@ -2589,32 +2589,46 @@ void test_p8overloads_backwards_compat() {
/* ----------------------- predicates --------------------------- */
/* vec_all_eq */
res_i = vec_all_eq(vsll, vsll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
res_i = vec_all_eq(vsll, vbll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
res_i = vec_all_eq(vull, vull);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
res_i = vec_all_eq(vull, vbll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
res_i = vec_all_eq(vbll, vsll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
res_i = vec_all_eq(vbll, vull);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
res_i = vec_all_eq(vbll, vbll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
/* vec_all_ne */
res_i = vec_all_ne(vsll, vsll);
@@ -2679,32 +2693,60 @@ void test_p8overloads_backwards_compat() {
/* vec_any_ne */
res_i = vec_any_ne(vsll, vsll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK: xor i1
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor i1
res_i = vec_any_ne(vsll, vbll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK: xor i1
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor i1
res_i = vec_any_ne(vull, vull);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK: xor i1
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor i1
res_i = vec_any_ne(vull, vbll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK: xor i1
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor i1
res_i = vec_any_ne(vbll, vsll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK: xor i1
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor i1
res_i = vec_any_ne(vbll, vull);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK: xor i1
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor i1
res_i = vec_any_ne(vbll, vbll);
- // CHECK: @llvm.ppc.altivec.vcmpequd.p
- // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
+ // CHECK: xor <2 x i64>
+ // CHECK: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK: xor i1
+ // CHECK-LE: xor <2 x i64>
+ // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p
+ // CHECK-LE: xor i1
/* vec_all_ge */
res_i = vec_all_ge(vsll, vsll);
More information about the cfe-commits
mailing list