[llvm] 272bd57 - [AArch64] Fix abs(sub nsw) -> absd

Wed Feb 22 01:17:44 PST 2023

Author: Ricardo Jesus
Date: 2023-02-22T09:17:25Z
New Revision: 272bd573dc4a2d0dc3c2102621bec32dd2f2988f

URL: https://github.com/llvm/llvm-project/commit/272bd573dc4a2d0dc3c2102621bec32dd2f2988f
DIFF: https://github.com/llvm/llvm-project/commit/272bd573dc4a2d0dc3c2102621bec32dd2f2988f.diff

LOG: [AArch64] Fix abs(sub nsw) -> absd

This partially reverts a regression introduced in 8f25e382c5b1 for
AArch64 targets. In particular, we restore the logic of `(abs (sub nsw
x, y)) -> abds(x, y)` for all targets except X86, which keeps the logic
introduced in 8f25e382c5b1. See also https://reviews.llvm.org/D142288.

Differential Revision: https://reviews.llvm.org/D144379

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/test/CodeGen/AArch64/sve-saba.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e302ab931a40b..9a19a80b39255 100644

--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -795,6 +795,12 @@ class TargetLoweringBase {
     return true;
   }
 
+  // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets
+  // may want to avoid this to prevent loss of sub_nsw pattern.
+  virtual bool preferABDSToABSWithNSW(EVT VT) const {
+    return true;
+  }
+
   // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X))
   virtual bool preferScalarizeSplat(unsigned Opc) const { return true; }
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 503b3c9e92e1d..5fdd241254682 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10520,9 +10520,8 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
   if (Opc0 != Op1.getOpcode() ||
       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
     // fold (abs (sub nsw x, y)) -> abds(x, y)
-    // Limit this to legal ops to prevent loss of sub_nsw pattern.
-    if (AbsOp1->getFlags().hasNoSignedWrap() &&
-        TLI.isOperationLegal(ISD::ABDS, VT)) {
+    if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
+        TLI.preferABDSToABSWithNSW(VT)) {
       SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
       return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
     }

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8d8f186b6baa8..1cbd249aaa91a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57116,6 +57116,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   return SDValue();
 }
 
+bool X86TargetLowering::preferABDSToABSWithNSW(EVT VT) const {
+  return false;
+}
+
 bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
   if (!isTypeLegal(VT))
     return false;

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 454ac829a99ba..2f143a553ecb4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1046,6 +1046,8 @@ namespace llvm {
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+    bool preferABDSToABSWithNSW(EVT VT) const override;
+
     /// Return true if the target has native support for
     /// the specified value type and it is 'desirable' to use the type for the
     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16

diff  --git a/llvm/test/CodeGen/AArch64/sve-saba.ll b/llvm/test/CodeGen/AArch64/sve-saba.ll
index 6877e190dd717..0022f1ba3450e 100644
--- a/llvm/test/CodeGen/AArch64/sve-saba.ll
+++ b/llvm/test/CodeGen/AArch64/sve-saba.ll
@@ -6,10 +6,7 @@
 define <vscale x 2 x i64> @saba_abs_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
 ; CHECK-LABEL: saba_abs_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    sub z1.d, z1.d, z2.d
-; CHECK-NEXT:    abs z1.d, p0/m, z1.d
-; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    saba z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %sub = sub nsw <vscale x 2 x i64> %b, %c
   %abs = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %sub, i1 true)
@@ -20,10 +17,7 @@ define <vscale x 2 x i64> @saba_abs_d(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 define <vscale x 4 x i32> @saba_abs_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
 ; CHECK-LABEL: saba_abs_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    sub z1.s, z1.s, z2.s
-; CHECK-NEXT:    abs z1.s, p0/m, z1.s
-; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    saba z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %sub = sub nsw <vscale x 4 x i32> %b, %c
   %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
@@ -34,10 +28,7 @@ define <vscale x 4 x i32> @saba_abs_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 define <vscale x 8 x i16> @saba_abs_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: saba_abs_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    sub z1.h, z1.h, z2.h
-; CHECK-NEXT:    abs z1.h, p0/m, z1.h
-; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    saba z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %sub = sub nsw <vscale x 8 x i16> %b, %c
   %abs = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %sub, i1 true)
@@ -48,10 +39,7 @@ define <vscale x 8 x i16> @saba_abs_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 define <vscale x 16 x i8> @saba_abs_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
 ; CHECK-LABEL: saba_abs_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    sub z1.b, z1.b, z2.b
-; CHECK-NEXT:    abs z1.b, p0/m, z1.b
-; CHECK-NEXT:    add z0.b, z0.b, z1.b
+; CHECK-NEXT:    saba z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %sub = sub nsw <vscale x 16 x i8> %b, %c
   %abs = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %sub, i1 true)