[llvm] 442c351 - Revert "[AArch64] Generate AND in place of CSEL for predicated CTTZ"

Tue May 10 09:17:07 PDT 2022

Author: David Green
Date: 2022-05-10T17:17:03+01:00
New Revision: 442c351b2bb1f99a4dc58f66660ce0f282f55b95

URL: https://github.com/llvm/llvm-project/commit/442c351b2bb1f99a4dc58f66660ce0f282f55b95
DIFF: https://github.com/llvm/llvm-project/commit/442c351b2bb1f99a4dc58f66660ce0f282f55b95.diff

LOG: Revert "[AArch64] Generate AND in place of CSEL for predicated CTTZ"

This reverts commit 7dcd0ea683ed3175bc3ec6aed24901a9d504182e due to
issues reported postcommit with the correctness of truncated cttzs.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Removed: 
    llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d9d05716187e..2759c6b23bc5 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -17630,45 +17630,6 @@ static SDValue performBRCONDCombine(SDNode *N,
   return SDValue();
 }
 
-static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) {
-  unsigned CC = N->getConstantOperandVal(2);
-  SDValue SUBS = N->getOperand(3);
-  SDValue Zero, CTTZ;
-
-  if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) {
-    Zero = N->getOperand(0);
-    CTTZ = N->getOperand(1);
-  } else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) {
-    Zero = N->getOperand(1);
-    CTTZ = N->getOperand(0);
-  } else
-    return SDValue();
-
-  if ((CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::TRUNCATE) ||
-      (CTTZ.getOpcode() == ISD::TRUNCATE &&
-       CTTZ.getOperand(0).getOpcode() != ISD::CTTZ))
-    return SDValue();
-
-  assert((CTTZ.getValueType() == MVT::i32 || CTTZ.getValueType() == MVT::i64) &&
-         "Illegal type in CTTZ folding");
-
-  if (!isNullConstant(Zero) || !isNullConstant(SUBS.getOperand(1)))
-    return SDValue();
-
-  SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE
-                  ? CTTZ.getOperand(0).getOperand(0)
-                  : CTTZ.getOperand(0);
-
-  if (X != SUBS.getOperand(0))
-    return SDValue();
-
-  unsigned BitWidth = CTTZ.getValueSizeInBits();
-  SDValue BitWidthMinusOne =
-      DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
-  return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ,
-                     BitWidthMinusOne);
-}
-
 // Optimize CSEL instructions
 static SDValue performCSELCombine(SDNode *N,
                                   TargetLowering::DAGCombinerInfo &DCI,
@@ -17677,11 +17638,6 @@ static SDValue performCSELCombine(SDNode *N,
   if (N->getOperand(0) == N->getOperand(1))
     return N->getOperand(0);
 
-  // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
-  // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
-  if (SDValue Folded = foldCSELofCTTZ(N, DAG))
-    return Folded;
-
   return performCONDCombine(N, DCI, DAG, 2, 3);
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll b/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll
deleted file mode 100644
index ce69100f8c4f..000000000000
--- a/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll
+++ /dev/null
@@ -1,160 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
-
-;; Check the transformation
-;; CSEL 0, cttz, cc -> AND cttz numbits-1
-;; for cttz in the case of i32 and i64 respectively
-
-;; Cases for which the optimzation takes place
-define i32 @cttzi32(i32 %x) {
-; CHECK-LABEL: cttzi32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit w8, w0
-; CHECK-NEXT:    clz w8, w8
-; CHECK-NEXT:    and w0, w8, #0x1f
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-  %1 = icmp eq i32 %x, 0
-  %2 = select i1 %1, i32 0, i32 %0
-  ret i32 %2
-}
-
-define i64 @cttzi64(i64 %x) {
-; CHECK-LABEL: cttzi64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit x8, x0
-; CHECK-NEXT:    clz x8, x8
-; CHECK-NEXT:    and x0, x8, #0x3f
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true)
-  %1 = icmp eq i64 %x, 0
-  %2 = select i1 %1, i64 0, i64 %0
-  ret i64 %2
-}
-
-define i32 @cttzi32ne(i32 %x) {
-; CHECK-LABEL: cttzi32ne:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit w8, w0
-; CHECK-NEXT:    clz w8, w8
-; CHECK-NEXT:    and w0, w8, #0x1f
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-  %1 = icmp ne i32 %x, 0
-  %2 = select i1 %1, i32 %0, i32 0
-  ret i32 %2
-}
-
-define i64 @cttzi64ne(i64 %x) {
-; CHECK-LABEL: cttzi64ne:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit x8, x0
-; CHECK-NEXT:    clz x8, x8
-; CHECK-NEXT:    and x0, x8, #0x3f
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true)
-  %1 = icmp ne i64 %x, 0
-  %2 = select i1 %1, i64 %0, i64 0
-  ret i64 %2
-}
-
-define i32 @cttztrunc(i64 %x) {
-; CHECK-LABEL: cttztrunc:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit x8, x0
-; CHECK-NEXT:    clz x8, x8
-; CHECK-NEXT:    and w0, w8, #0x1f
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true)
-  %1 = icmp eq i64 %x, 0
-  %2 = select i1 %1, i64 0, i64 %0
-  %3 = trunc i64 %2 to i32
-  ret i32 %3
-}
-
-;; Cases for which the optimization does not take place
-define i32 @cttzne(i32 %x) {
-; CHECK-LABEL: cttzne:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit w8, w0
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    clz w8, w8
-; CHECK-NEXT:    csel w0, wzr, w8, ne
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-  %1 = icmp ne i32 %x, 0
-  %2 = select i1 %1, i32 0, i32 %0
-  ret i32 %2
-}
-
-define i32 @cttzxnot0(i32 %x) {
-; CHECK-LABEL: cttzxnot0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit w8, w0
-; CHECK-NEXT:    cmp w0, #10
-; CHECK-NEXT:    clz w8, w8
-; CHECK-NEXT:    csel w0, wzr, w8, eq
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-  %1 = icmp eq i32 %x, 10
-  %2 = select i1 %1, i32 0, i32 %0
-  ret i32 %2
-}
-
-define i32 @cttzlhsnot0(i32 %x) {
-; CHECK-LABEL: cttzlhsnot0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit w9, w0
-; CHECK-NEXT:    mov w8, #10
-; CHECK-NEXT:    clz w9, w9
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w0, w8, w9, eq
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-  %1 = icmp eq i32 %x, 0
-  %2 = select i1 %1, i32 10, i32 %0
-  ret i32 %2
-}
-
-define i32 @notcttz(i32 %x) {
-; CHECK-LABEL: notcttz:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    clz w8, w0
-; CHECK-NEXT:    cmp w0, #0
-; CHECK-NEXT:    csel w0, wzr, w8, eq
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-  %1 = icmp eq i32 %x, 0
-  %2 = select i1 %1, i32 0, i32 %0
-  ret i32 %2
-}
-
-define i32 @cttzlhsnotx(i32 %x, i32 %y) {
-; CHECK-LABEL: cttzlhsnotx:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rbit w8, w0
-; CHECK-NEXT:    cmp w1, #0
-; CHECK-NEXT:    clz w8, w8
-; CHECK-NEXT:    csel w0, wzr, w8, eq
-; CHECK-NEXT:    ret
-entry:
-  %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-  %1 = icmp eq i32 %y, 0
-  %2 = select i1 %1, i32 0, i32 %0
-  ret i32 %2
-}
-
-declare i32 @llvm.cttz.i32(i32, i1)
-
-declare i64 @llvm.cttz.i64(i64, i1)
-
-declare i32 @llvm.ctlz.i32(i32, i1)