[llvm] 21df504 - [DAG][ARM][AArch64] Transform max(a, b) - min(a, b) -> abd(a,b)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 18 03:44:31 PST 2023
Author: David Green
Date: 2023-01-18T11:44:26Z
New Revision: 21df504399c27e904b7ad2abd270ae21582a1fb2
URL: https://github.com/llvm/llvm-project/commit/21df504399c27e904b7ad2abd270ae21582a1fb2
DIFF: https://github.com/llvm/llvm-project/commit/21df504399c27e904b7ad2abd270ae21582a1fb2.diff
LOG: [DAG][ARM][AArch64] Transform max(a,b) - min(a,b) -> abd(a,b)
This adds both signed and unsigned transforms for
max(a, b) - min(a, b) -> abd(a, b).
unsigned: https://alive2.llvm.org/ce/z/RF4jGQ
signed: https://alive2.llvm.org/ce/z/Cjr2zE
Fixes: #59894
Differential Revision: https://reviews.llvm.org/D141706
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/neon-abd.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bc9a7c4392d7..0271ffc3f3df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3789,6 +3789,24 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // max(a,b) - min(a,b) --> abd(a,b)
+ auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
+ if (N0.getOpcode() != Max || N1.getOpcode() != Min)
+ return SDValue();
+ if ((N0.getOperand(0) != N1.getOperand(0) ||
+ N0.getOperand(1) != N1.getOperand(1)) &&
+ (N0.getOperand(0) != N1.getOperand(1) ||
+ N0.getOperand(1) != N1.getOperand(0)))
+ return SDValue();
+ if (!TLI.isOperationLegalOrCustom(Abd, VT))
+ return SDValue();
+ return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
+ };
+ if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
+ return R;
+ if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
+ return R;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 2c965df550ba..0d22470b01a9 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -450,9 +450,7 @@ define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) #0 {
define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
; CHECK-LABEL: smaxmin_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: smax v2.16b, v0.16b, v1.16b
-; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: sub v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
%b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
@@ -463,9 +461,7 @@ define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
; CHECK-LABEL: smaxmin_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: smax v2.8h, v0.8h, v1.8h
-; CHECK-NEXT: smin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
%b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
@@ -476,9 +472,7 @@ define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
; CHECK-LABEL: smaxmin_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: smax v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
%b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
@@ -504,9 +498,7 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
; CHECK-LABEL: umaxmin_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: umax v2.16b, v0.16b, v1.16b
-; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: sub v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: uabd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
%b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
@@ -517,9 +509,7 @@ define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
; CHECK-LABEL: umaxmin_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: umax v2.8h, v0.8h, v1.8h
-; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: sub v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
%a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
%b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
@@ -530,9 +520,7 @@ define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
; CHECK-LABEL: umaxmin_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: umax v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
%b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
@@ -558,9 +546,7 @@ define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
; CHECK-LABEL: umaxmin_v16i8_com1:
; CHECK: // %bb.0:
-; CHECK-NEXT: umax v2.16b, v0.16b, v1.16b
-; CHECK-NEXT: umin v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: sub v0.16b, v2.16b, v0.16b
+; CHECK-NEXT: uabd v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
%b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
More information about the llvm-commits
mailing list