[llvm] 448d896 - [PowerPC] Add coverage for select(icmp_sgt(x,y),sub(x,y),sub(y,x)) -> abds(x,y) patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 25 13:15:38 PST 2023
Author: Simon Pilgrim
Date: 2023-02-25T21:04:16Z
New Revision: 448d896519d2c7ec79dec75d1ec33c120a767659
URL: https://github.com/llvm/llvm-project/commit/448d896519d2c7ec79dec75d1ec33c120a767659
DIFF: https://github.com/llvm/llvm-project/commit/448d896519d2c7ec79dec75d1ec33c120a767659.diff
LOG: [PowerPC] Add coverage for select(icmp_sgt(x,y),sub(x,y),sub(y,x)) -> abds(x,y) patterns
Added:
Modified:
llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 11672dc71efc..9e9271ed7c5d 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -1831,6 +1831,194 @@ define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
ret <16 x i8> %6
}
+; Tests for ABDS icmp + sub + select sequence
+
+define <4 x i32> @absd_int32_sgt(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: absd_int32_sgt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsw v4, v2, v3
+; CHECK-NEXT: vsubuwm v5, v2, v3
+; CHECK-NEXT: vsubuwm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v2, v5, v4
+; CHECK-NEXT: blr
+ %3 = icmp sgt <4 x i32> %0, %1
+ %4 = sub <4 x i32> %0, %1
+ %5 = sub <4 x i32> %1, %0
+ %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
+ ret <4 x i32> %6
+}
+
+define <4 x i32> @absd_int32_sge(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: absd_int32_sge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsw v4, v3, v2
+; CHECK-NEXT: xxlnor vs0, v4, v4
+; CHECK-NEXT: vsubuwm v4, v2, v3
+; CHECK-NEXT: vsubuwm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v2, v4, vs0
+; CHECK-NEXT: blr
+ %3 = icmp sge <4 x i32> %0, %1
+ %4 = sub <4 x i32> %0, %1
+ %5 = sub <4 x i32> %1, %0
+ %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
+ ret <4 x i32> %6
+}
+
+define <4 x i32> @absd_int32_slt(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: absd_int32_slt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsw v4, v3, v2
+; CHECK-NEXT: vsubuwm v5, v2, v3
+; CHECK-NEXT: vsubuwm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v5, v2, v4
+; CHECK-NEXT: blr
+ %3 = icmp slt <4 x i32> %0, %1
+ %4 = sub <4 x i32> %0, %1
+ %5 = sub <4 x i32> %1, %0
+ %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
+ ret <4 x i32> %6
+}
+
+define <4 x i32> @absd_int32_sle(<4 x i32>, <4 x i32>) {
+; CHECK-LABEL: absd_int32_sle:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsw v4, v2, v3
+; CHECK-NEXT: xxlnor vs0, v4, v4
+; CHECK-NEXT: vsubuwm v4, v2, v3
+; CHECK-NEXT: vsubuwm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v4, v2, vs0
+; CHECK-NEXT: blr
+ %3 = icmp sle <4 x i32> %0, %1
+ %4 = sub <4 x i32> %0, %1
+ %5 = sub <4 x i32> %1, %0
+ %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
+ ret <4 x i32> %6
+}
+
+define <8 x i16> @absd_int16_sgt(<8 x i16>, <8 x i16>) {
+; CHECK-LABEL: absd_int16_sgt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsh v4, v2, v3
+; CHECK-NEXT: vsubuhm v5, v2, v3
+; CHECK-NEXT: vsubuhm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v2, v5, v4
+; CHECK-NEXT: blr
+ %3 = icmp sgt <8 x i16> %0, %1
+ %4 = sub <8 x i16> %0, %1
+ %5 = sub <8 x i16> %1, %0
+ %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
+ ret <8 x i16> %6
+}
+
+define <8 x i16> @absd_int16_sge(<8 x i16>, <8 x i16>) {
+; CHECK-LABEL: absd_int16_sge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsh v4, v3, v2
+; CHECK-NEXT: vsubuhm v5, v2, v3
+; CHECK-NEXT: vsubuhm v2, v3, v2
+; CHECK-NEXT: xxlnor v4, v4, v4
+; CHECK-NEXT: xxsel v2, v2, v5, v4
+; CHECK-NEXT: blr
+ %3 = icmp sge <8 x i16> %0, %1
+ %4 = sub <8 x i16> %0, %1
+ %5 = sub <8 x i16> %1, %0
+ %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
+ ret <8 x i16> %6
+}
+
+define <8 x i16> @absd_int16_slt(<8 x i16>, <8 x i16>) {
+; CHECK-LABEL: absd_int16_slt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsh v4, v3, v2
+; CHECK-NEXT: vsubuhm v5, v2, v3
+; CHECK-NEXT: vsubuhm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v5, v2, v4
+; CHECK-NEXT: blr
+ %3 = icmp slt <8 x i16> %0, %1
+ %4 = sub <8 x i16> %0, %1
+ %5 = sub <8 x i16> %1, %0
+ %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
+ ret <8 x i16> %6
+}
+
+define <8 x i16> @absd_int16_sle(<8 x i16>, <8 x i16>) {
+; CHECK-LABEL: absd_int16_sle:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsh v4, v2, v3
+; CHECK-NEXT: vsubuhm v5, v2, v3
+; CHECK-NEXT: vsubuhm v2, v3, v2
+; CHECK-NEXT: xxlnor v4, v4, v4
+; CHECK-NEXT: xxsel v2, v5, v2, v4
+; CHECK-NEXT: blr
+ %3 = icmp sle <8 x i16> %0, %1
+ %4 = sub <8 x i16> %0, %1
+ %5 = sub <8 x i16> %1, %0
+ %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
+ ret <8 x i16> %6
+}
+
+define <16 x i8> @absd_int8_sgt(<16 x i8>, <16 x i8>) {
+; CHECK-LABEL: absd_int8_sgt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsb v4, v2, v3
+; CHECK-NEXT: vsububm v5, v2, v3
+; CHECK-NEXT: vsububm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v2, v5, v4
+; CHECK-NEXT: blr
+ %3 = icmp sgt <16 x i8> %0, %1
+ %4 = sub <16 x i8> %0, %1
+ %5 = sub <16 x i8> %1, %0
+ %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
+ ret <16 x i8> %6
+}
+
+define <16 x i8> @absd_int8_sge(<16 x i8>, <16 x i8>) {
+; CHECK-LABEL: absd_int8_sge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsb v4, v3, v2
+; CHECK-NEXT: vsububm v5, v2, v3
+; CHECK-NEXT: vsububm v2, v3, v2
+; CHECK-NEXT: xxlnor v4, v4, v4
+; CHECK-NEXT: xxsel v2, v2, v5, v4
+; CHECK-NEXT: blr
+ %3 = icmp sge <16 x i8> %0, %1
+ %4 = sub <16 x i8> %0, %1
+ %5 = sub <16 x i8> %1, %0
+ %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
+ ret <16 x i8> %6
+}
+
+define <16 x i8> @absd_int8_slt(<16 x i8>, <16 x i8>) {
+; CHECK-LABEL: absd_int8_slt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsb v4, v3, v2
+; CHECK-NEXT: vsububm v5, v2, v3
+; CHECK-NEXT: vsububm v2, v3, v2
+; CHECK-NEXT: xxsel v2, v5, v2, v4
+; CHECK-NEXT: blr
+ %3 = icmp slt <16 x i8> %0, %1
+ %4 = sub <16 x i8> %0, %1
+ %5 = sub <16 x i8> %1, %0
+ %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
+ ret <16 x i8> %6
+}
+
+define <16 x i8> @absd_int8_sle(<16 x i8>, <16 x i8>) {
+; CHECK-LABEL: absd_int8_sle:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vcmpgtsb v4, v2, v3
+; CHECK-NEXT: vsububm v5, v2, v3
+; CHECK-NEXT: vsububm v2, v3, v2
+; CHECK-NEXT: xxlnor v4, v4, v4
+; CHECK-NEXT: xxsel v2, v5, v2, v4
+; CHECK-NEXT: blr
+ %3 = icmp sle <16 x i8> %0, %1
+ %4 = sub <16 x i8> %0, %1
+ %5 = sub <16 x i8> %1, %0
+ %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
+ ret <16 x i8> %6
+}
+
; some cases we are unable to optimize
; check whether goes beyond the scope
define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
More information about the llvm-commits
mailing list