[llvm] r268858 - [x86, BMI] add TLI hook for 'andn' and use it to simplify comparisons

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sat May 7 08:03:41 PDT 2016


Author: spatel
Date: Sat May  7 10:03:40 2016
New Revision: 268858

URL: http://llvm.org/viewvc/llvm-project?rev=268858&view=rev
Log:
[x86, BMI] add TLI hook for 'andn' and use it to simplify comparisons

For the sake of minimalism, this patch is x86 only, but I think that at least
PPC, ARM, AArch64, and Sparc probably want to do this too.

We might want to generalize the hook and pattern recognition for a target like
PPC that has a full assortment of negated logic ops (orc, nand).

Note that http://reviews.llvm.org/D18842 will cause this transform to trigger
more often.

For reference, this relates to:
https://llvm.org/bugs/show_bug.cgi?id=27105
https://llvm.org/bugs/show_bug.cgi?id=27202
https://llvm.org/bugs/show_bug.cgi?id=27203
https://llvm.org/bugs/show_bug.cgi?id=27328

Differential Revision: http://reviews.llvm.org/D19087


Modified:
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/X86/bmi.ll

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=268858&r1=268857&r2=268858&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Sat May  7 10:03:40 2016
@@ -334,6 +334,22 @@ public:
     return MaskAndBranchFoldingIsLegal;
   }
 
+  /// Return true if the target should transform:
+  /// (X & Y) == Y ---> (~X & Y) == 0
+  /// (X & Y) != Y ---> (~X & Y) != 0
+  ///
+  /// This may be profitable if the target has a bitwise and-not operation that
+  /// sets comparison flags. A target may want to limit the transformation based
+  /// on the type of Y or if Y is a constant.
+  ///
+  /// Note that the transform will not occur if Y is known to be a power-of-2
+  /// because a mask and compare of a single bit can be handled by inverting the
+  /// predicate, for example:
+  /// (X & 8) == 8 ---> (X & 8) != 0
+  virtual bool hasAndNotCompare(SDValue Y) const {
+    return false;
+  }
+
   /// \brief Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=268858&r1=268857&r2=268858&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Sat May  7 10:03:40 2016
@@ -1304,6 +1304,52 @@ bool TargetLowering::isExtendedTrueVal(c
   llvm_unreachable("Unexpected enumeration.");
 }
 
+/// If the target supports an 'and-not' or 'and-complement' logic operation,
+/// try to use that to make a comparison operation more efficient.
+static SDValue createAndNotSetCC(EVT VT, SDValue N0, SDValue N1,
+                                 ISD::CondCode Cond, SelectionDAG &DAG,
+                                 SDLoc dl) {
+  // Match these patterns in any of their permutations:
+  // (X & Y) == Y
+  // (X & Y) != Y
+  if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
+    std::swap(N0, N1);
+
+  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+      (Cond != ISD::SETEQ && Cond != ISD::SETNE))
+    return SDValue();
+
+  SDValue X, Y;
+  if (N0.getOperand(0) == N1) {
+    X = N0.getOperand(1);
+    Y = N0.getOperand(0);
+  } else if (N0.getOperand(1) == N1) {
+    X = N0.getOperand(0);
+    Y = N0.getOperand(1);
+  } else {
+    return SDValue();
+  }
+
+  // Bail out if the compare operand that we want to turn into a zero is already
+  // a zero (otherwise, infinite loop).
+  auto *YConst = dyn_cast<ConstantSDNode>(Y);
+  if (YConst && YConst->isNullValue())
+    return SDValue();
+
+  // We don't want to do this transform if the mask is a single bit because
+  // there are more efficient ways to deal with that case (for example, 'bt' on
+  // x86 or 'rlwinm' on PPC).
+  if (!DAG.getTargetLoweringInfo().hasAndNotCompare(Y) ||
+      valueHasExactlyOneBitSet(Y, DAG))
+    return SDValue();
+
+  // Transform this into: ~X & Y == 0.
+  EVT OpVT = X.getValueType();
+  SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
+  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
+  return DAG.getSetCC(dl, VT, NewAnd, DAG.getConstant(0, dl, OpVT), Cond);
+}
+
 /// Try to simplify a setcc built with the specified operands and cc. If it is
 /// unable to simplify it, return a null SDValue.
 SDValue
@@ -2166,6 +2212,9 @@ TargetLowering::SimplifySetCC(EVT VT, SD
     return N0;
   }
 
+  if (SDValue AndNotCC = createAndNotSetCC(VT, N0, N1, Cond, DAG, dl))
+    return AndNotCC;
+
   // Could not fold it.
   return SDValue();
 }

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=268858&r1=268857&r2=268858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May  7 10:03:40 2016
@@ -4122,6 +4122,18 @@ bool X86TargetLowering::isCheapToSpecula
   return Subtarget.hasLZCNT();
 }
 
+bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
+  if (!Subtarget.hasBMI())
+    return false;
+
+  // There are only 32-bit and 64-bit forms for 'andn'.
+  EVT VT = Y.getValueType();
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return false;
+
+  return true;
+}
+
 /// Return true if every element in Mask, beginning
 /// from position Pos and ending in Pos+Size is undef.
 static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=268858&r1=268857&r2=268858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat May  7 10:03:40 2016
@@ -751,6 +751,8 @@ namespace llvm {
 
     bool isCheapToSpeculateCtlz() const override;
 
+    bool hasAndNotCompare(SDValue Y) const override;
+
     /// Return the value type to use for ISD::SETCC.
     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                            EVT VT) const override;

Modified: llvm/trunk/test/CodeGen/X86/bmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi.ll?rev=268858&r1=268857&r2=268858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi.ll Sat May  7 10:03:40 2016
@@ -135,12 +135,11 @@ define i1 @andn_cmp(i32 %x, i32 %y) {
   ret i1 %cmp
 }
 
-; TODO: Recognize a disguised andn in the following 4 tests.
+; Recognize a disguised andn in the following 4 tests.
 define i1 @and_cmp1(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %and = and i32 %x, %y
@@ -151,8 +150,7 @@ define i1 @and_cmp1(i32 %x, i32 %y) {
 define i1 @and_cmp2(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp2:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
   %and = and i32 %y, %x
@@ -163,8 +161,7 @@ define i1 @and_cmp2(i32 %x, i32 %y) {
 define i1 @and_cmp3(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp3:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %and = and i32 %x, %y
@@ -175,8 +172,7 @@ define i1 @and_cmp3(i32 %x, i32 %y) {
 define i1 @and_cmp4(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp4:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
   %and = and i32 %y, %x
@@ -189,8 +185,8 @@ define i1 @and_cmp4(i32 %x, i32 %y) {
 define i1 @and_cmp_const(i32 %x) {
 ; CHECK-LABEL: and_cmp_const:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl $43, %edi
-; CHECK-NEXT:    cmpl $43, %edi
+; CHECK-NEXT:    movl $43, %eax
+; CHECK-NEXT:    andnl %eax, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %and = and i32 %x, 43
@@ -198,6 +194,63 @@ define i1 @and_cmp_const(i32 %x) {
   ret i1 %cmp
 }
 
+; But don't use 'andn' if the mask is a power-of-two.
+define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
+; CHECK-LABEL: and_cmp_const_power_of_two:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    btl %esi, %edi
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retq
+;
+  %shl = shl i32 1, %y
+  %and = and i32 %x, %shl
+  %cmp = icmp ne i32 %and, %shl
+  ret i1 %cmp
+}
+
+; Don't transform to 'andn' if there's another use of the 'and'.
+define i32 @and_cmp_not_one_use(i32 %x) {
+; CHECK-LABEL: and_cmp_not_one_use:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $37, %edi
+; CHECK-NEXT:    cmpl $37, %edi
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    retq
+;
+  %and = and i32 %x, 37
+  %cmp = icmp eq i32 %and, 37
+  %ext = zext i1 %cmp to i32
+  %add = add i32 %and, %ext
+  ret i32 %add
+}
+
+; Verify that we're not transforming invalid comparison predicates.
+define i1 @not_an_andn1(i32 %x, i32 %y) {
+; CHECK-LABEL: not_an_andn1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl %esi, %edi
+; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    retq
+  %and = and i32 %x, %y
+  %cmp = icmp sgt i32 %y, %and
+  ret i1 %cmp
+}
+
+define i1 @not_an_andn2(i32 %x, i32 %y) {
+; CHECK-LABEL: not_an_andn2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl %esi, %edi
+; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    retq
+  %and = and i32 %y, %x
+  %cmp = icmp ule i32 %y, %and
+  ret i1 %cmp
+}
+
 ; Don't choose a 'test' if an 'andn' can be used.
 define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
 ; CHECK-LABEL: andn_cmp_swap_ops:




More information about the llvm-commits mailing list