<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - [AArch64] Missed CMN opportunities"
   href="https://bugs.llvm.org/show_bug.cgi?id=33486">33486</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>[AArch64] Missed CMN opportunities
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: AArch64
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>mcrosier@codeaurora.org
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Tests:
bool test1(int a, int b) { return (a + b) == 0; }
bool test2(int a, int b) { return (a + b) != 0; }

Clang currently generates:
test1:                                  // @test1
// BB#0:                                // %entry
        neg      w8, w1
        cmp             w8, w0
        cset     w0, eq
        ret
test2:                                  // @test2
// BB#0:                                // %entry
        neg      w8, w1
        cmp             w8, w0
        cset     w0, ne
        ret

However, we should generate:
test1:                                  // @test1
// BB#0:                                // %entry
        cmn             w0, w1
        cset     w0, eq
        ret
test2:                                  // @test2
// BB#0:                                // %entry
        cmn             w0, w1
        cset     w0, ne
        ret

Reassociation will canonicalizes to these tests to 0-x == y and 0-x != y.  And
then for X86 a DAG combine will covert these back to x+y == 0 and x+y != 0,
respectively.

Here's a patch for AArch64 that is a straight cut and paste from X86 to
AArch64:
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp
b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 083ca21..8286838 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -545,6 +545,7 @@ AArch64TargetLowering::AArch64TargetLowering(const
TargetMachine &TM,

   setTargetDAGCombine(ISD::MUL);

+  setTargetDAGCombine(ISD::SETCC);
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::VSELECT);

@@ -10204,6 +10205,34 @@ static SDValue performNVCASTCombine(SDNode *N) {
   return SDValue();
 }

+static SDValue performSetCCCombine(SDNode *N, SelectionDAG &DAG) {
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  if (CC != ISD::SETNE && CC != ISD::SETEQ)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  SDLoc DL(N);
+  EVT OpVT = LHS.getValueType();
+  // 0-x == y --> x+y == 0
+  // 0-x != y --> x+y != 0
+  if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+    LHS.hasOneUse()) {
+  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, RHS, LHS.getOperand(1));
+  return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
+  }
+  // x == 0-y --> x+y == 0
+  // x != 0-y --> x+y != 0
+  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
+    RHS.hasOneUse()) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
+    return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
+  }
+  return SDValue();
+}
+
 SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -10249,6 +10278,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode
*N,
     break;
   case ISD::STORE:
     return performSTORECombine(N, DCI, DAG, Subtarget);
+  case ISD::SETCC:
+    return performSetCCCombine(N, DAG);
   case AArch64ISD::BRCOND:
     return performBRCONDCombine(N, DCI, DAG);
   case AArch64ISD::TBNZ:


The above patch will fix the problem, but I'm not sure it's the best solution
and it doesn't impact any of the workloads I care about.  Therefore, I'm not
interested in pursuing this further, but figured I'd file a bug..</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>