[llvm] a85f587 - [AArch64] Remove SETCC of CSEL when the latter's condition can be inverted

Fri Jun 4 08:01:36 PDT 2021

Author: Bradley Smith
Date: 2021-06-04T15:53:21+01:00
New Revision: a85f5874e2a4d4bad8c53d277e9df183122793ae

URL: https://github.com/llvm/llvm-project/commit/a85f5874e2a4d4bad8c53d277e9df183122793ae
DIFF: https://github.com/llvm/llvm-project/commit/a85f5874e2a4d4bad8c53d277e9df183122793ae.diff

LOG: [AArch64] Remove SETCC of CSEL when the latter's condition can be inverted

  setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X

Where X is a condition code setting instruction.

Co-authored-by: Paul Walker <paul.walker at arm.com>

Differential Revision: https://reviews.llvm.org/D103256

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-setcc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 89da20e50336..b7646a6eb2e0 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -886,6 +886,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::FP_TO_UINT);
   setTargetDAGCombine(ISD::FDIV);
 
+  // Try and combine setcc with csel
+  setTargetDAGCombine(ISD::SETCC);
+
   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 
   setTargetDAGCombine(ISD::ANY_EXTEND);
@@ -15370,6 +15373,35 @@ static SDValue performCSELCombine(SDNode *N,
   return performCONDCombine(N, DCI, DAG, 2, 3);
 }
 
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+  // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
+  if (Cond == ISD::SETNE && isOneConstant(RHS) &&
+      LHS->getOpcode() == AArch64ISD::CSEL &&
+      isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
+      LHS->hasOneUse()) {
+    SDLoc DL(N);
+
+    // Invert CSEL's condition.
+    auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
+    auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
+    auto NewCond = getInvertedCondCode(OldCond);
+
+    // csel 0, 1, !cond, X
+    SDValue CSEL =
+        DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
+                    LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
+                    LHS.getOperand(3));
+    return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
+  }
+
+  return SDValue();
+}
+
 // Optimize some simple tbz/tbnz cases.  Returns the new operand and bit to test
 // as well as whether the test should be inverted.  This code is required to
 // catch these cases (as opposed to standard dag combines) because
@@ -16207,6 +16239,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performSelectCombine(N, DCI);
   case ISD::VSELECT:
     return performVSelectCombine(N, DCI.DAG);
+  case ISD::SETCC:
+    return performSETCCCombine(N, DAG);
   case ISD::LOAD:
     if (performTBISimplification(N->getOperand(1), DCI, DAG))
       return SDValue(N, 0);

diff  --git a/llvm/test/CodeGen/AArch64/sve-setcc.ll b/llvm/test/CodeGen/AArch64/sve-setcc.ll
index 15c277796f38..191e988f5714 100644
--- a/llvm/test/CodeGen/AArch64/sve-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-setcc.ll
@@ -1,5 +1,23 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
 
+; Ensure we use the CC result of SVE compare instructions when branching.
+define void @sve_cmplt_setcc(<vscale x 8 x i16>* %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) {
+; CHECK-LABEL: @sve_cmplt_setcc
+; CHECK: cmplt p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: b.eq
+entry:
+  %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer)
+  %1 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %0)
+  br i1 %1, label %if.then, label %if.end
+
+if.then:
+  tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %in, <vscale x 8 x i16>* %out, i32 2, <vscale x 8 x i1> %pg)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
 ; Ensure we use the inverted CC result of SVE compare instructions when branching.
 define void @sve_cmplt_setcc_inverted(<vscale x 8 x i16>* %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) {
 ; CHECK-LABEL: @sve_cmplt_setcc_inverted
@@ -18,7 +36,26 @@ if.end:
   ret void
 }
 
+; Ensure we combine setcc and csel so as to not end up with an extra compare
+define void @sve_cmplt_setcc_hslo(<vscale x 8 x i16>* %out, <vscale x 8 x i16> %in, <vscale x 8 x i1> %pg) {
+; CHECK-LABEL: @sve_cmplt_setcc_hslo
+; CHECK: cmplt p1.h, p0/z, z0.h, #0
+; CHECK-NEXT: b.hs
+entry:
+  %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %in, <vscale x 2 x i64> zeroinitializer)
+  %1 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %0)
+  br i1 %1, label %if.then, label %if.end
+
+if.then:
+  tail call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> %in, <vscale x 8 x i16>* %out, i32 2, <vscale x 8 x i1> %pg)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
 declare i1 @llvm.aarch64.sve.ptest.any.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
+declare i1 @llvm.aarch64.sve.ptest.last.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
 
 declare <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)