[llvm] r304282 - [PowerPC] Eliminate integer compare instructions - vol. 2

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Tue May 30 22:40:26 PDT 2017


Author: nemanjai
Date: Wed May 31 00:40:25 2017
New Revision: 304282

URL: http://llvm.org/viewvc/llvm-project?rev=304282&view=rev
Log:
[PowerPC] Eliminate integer compare instructions - vol. 2

This patch builds upon https://reviews.llvm.org/rL302810 to add
handling for bitwise logical operations in general purpose registers.
The idea is to keep the values in GPRs as long as possible - only
extracting them to a condition register bit when no further operations
are to be done.

Differential Revision: https://reviews.llvm.org/D31851

Added:
    llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp?rev=304282&r1=304281&r2=304282&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Wed May 31 00:40:25 2017
@@ -77,6 +77,11 @@ STATISTIC(SignExtensionsAdded,
           "Number of sign extensions for compare inputs added.");
 STATISTIC(ZeroExtensionsAdded,
           "Number of zero extensions for compare inputs added.");
+STATISTIC(NumLogicOpsOnComparison,
+          "Number of logical ops on i1 values calculated in GPR.");
+STATISTIC(OmittedForNonExtendUses,
+          "Number of compares not eliminated as they have non-extending uses.");
+
 // FIXME: Remove this once the bug has been fixed!
 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
@@ -275,6 +280,8 @@ private:
 
     bool trySETCC(SDNode *N);
     bool tryEXTEND(SDNode *N);
+    bool tryLogicOpOfCompares(SDNode *N);
+    SDValue computeLogicOpInGPR(SDValue LogicOp);
     SDValue signExtendInputIfNeeded(SDValue Input);
     SDValue zeroExtendInputIfNeeded(SDValue Input);
     SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
@@ -2501,6 +2508,11 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N
   return true;
 }
 
+// Is this opcode a bitwise logical operation?
+static bool isLogicOp(unsigned Opc) {
+  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
+}
+
 /// If this node is a sign/zero extension of an integer comparison,
 /// it can usually be computed in GPR's rather than using comparison
 /// instructions and ISEL. We only do this on 64-bit targets for now
@@ -2513,13 +2525,20 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *
           N->getOpcode() == ISD::SIGN_EXTEND) &&
           "Expecting a zero/sign extend node!");
 
-  if (N->getOperand(0).getOpcode() != ISD::SETCC)
+  SDValue WideRes;
+  // If we are zero-extending the result of a logical operation on i1
+  // values, we can keep the values in GPRs.
+  if (isLogicOp(N->getOperand(0).getOpcode()) &&
+      N->getOperand(0).getValueType() == MVT::i1 &&
+      N->getOpcode() == ISD::ZERO_EXTEND)
+    WideRes = computeLogicOpInGPR(N->getOperand(0));
+  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
     return false;
-
-  SDValue WideRes =
-    getSETCCInGPR(N->getOperand(0),
-                  N->getOpcode() == ISD::SIGN_EXTEND ?
-                  SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
+  else
+    WideRes =
+      getSETCCInGPR(N->getOperand(0),
+                    N->getOpcode() == ISD::SIGN_EXTEND ?
+                    SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
 
   if (!WideRes)
     return false;
@@ -2540,6 +2559,159 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *
   return true;
 }
 
+// Lower a logical operation on i1 values into a GPR sequence if possible.
+// The result can be kept in a GPR if requested.
+// Three types of inputs can be handled:
+// - SETCC
+// - TRUNCATE
+// - Logical operation (AND/OR/XOR)
+// There is also a special case that is handled (namely a complement operation
+// achieved with xor %a, -1).
+SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) {
+  assert(isLogicOp(LogicOp.getOpcode()) &&
+         "Can only handle logic operations here.");
+  assert(LogicOp.getValueType() == MVT::i1 &&
+         "Can only handle logic operations on i1 values here.");
+  SDLoc dl(LogicOp);
+  SDValue LHS, RHS;
+
+  // Special case: xor %a, -1
+  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
+
+  // Produces a GPR sequence for each operand of the binary logic operation.
+  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
+  // the value in a GPR and for logic operations, it will recursively produce
+  // a GPR sequence for the operation.
+  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
+    unsigned OperandOpcode = Operand.getOpcode();
+    if (OperandOpcode == ISD::SETCC)
+      return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
+    else if (OperandOpcode == ISD::TRUNCATE) {
+      SDValue InputOp = Operand.getOperand(0);
+      EVT InVT = InputOp.getValueType();
+      return
+        SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
+                                       PPC::RLDICL, dl, InVT, InputOp,
+                                       getI64Imm(0, dl), getI64Imm(63, dl)), 0);
+    } else if (isLogicOp(OperandOpcode))
+      return computeLogicOpInGPR(Operand);
+    return SDValue();
+  };
+  LHS = getLogicOperand(LogicOp.getOperand(0));
+  RHS = getLogicOperand(LogicOp.getOperand(1));
+
+  // If a GPR sequence can't be produced for the LHS we can't proceed.
+  // Not producing a GPR sequence for the RHS is only a problem if this isn't
+  // a bitwise negation operation.
+  if (!LHS || (!RHS && !IsBitwiseNegation))
+    return SDValue();
+
+  NumLogicOpsOnComparison++;
+
+  // We will use the inputs as 64-bit values.
+  if (LHS.getValueType() == MVT::i32)
+    LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
+  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
+    RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
+
+  unsigned NewOpc;
+  switch (LogicOp.getOpcode()) {
+  default: llvm_unreachable("Unknown logic operation.");
+  case ISD::AND: NewOpc = PPC::AND8; break;
+  case ISD::OR:  NewOpc = PPC::OR8;  break;
+  case ISD::XOR: NewOpc = PPC::XOR8; break;
+  }
+
+  if (IsBitwiseNegation) {
+    RHS = getI64Imm(1, dl);
+    NewOpc = PPC::XORI8;
+  }
+
+  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
+
+}
+
+/// Try performing logical operations on results of comparisons in GPRs.
+/// It is typically preferred from a performance perspective over performing
+/// the operations on individual bits in the CR. We only do this on 64-bit
+/// targets for now as the code is specialized for 64-bit (it uses 64-bit
+/// instructions and assumes 64-bit registers).
+bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) {
+  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
+    return false;
+  if (N->getValueType(0) != MVT::i1)
+    return false;
+  assert(isLogicOp(N->getOpcode()) &&
+         "Expected a logic operation on setcc results.");
+  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
+  if (!LoweredLogical)
+    return false;
+
+  SDLoc dl(N);
+  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
+  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
+  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+  SDValue LHS = LoweredLogical.getOperand(0);
+  SDValue RHS = LoweredLogical.getOperand(1);
+  SDValue WideOp;
+  SDValue OpToConvToRecForm;
+
+  // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode
+  // that is input to the XORI.
+  if (IsBitwiseNegate &&
+      LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
+    OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
+  else if (IsBitwiseNegate)
+    // If the input to the XORI isn't an extension, that's what we're after.
+    OpToConvToRecForm = LoweredLogical.getOperand(0);
+  else
+    // If this is not an XORI, it is a reg-reg logical op and we can convert it
+    // to record-form.
+    OpToConvToRecForm = LoweredLogical;
+
+  // Get the record-form version of the node we're looking to use to get the
+  // CR result from.
+  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
+  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
+
+  // Convert the right node to record-form. This is either the logical we're
+  // looking at or it is the input node to the negation (if we're looking at
+  // a bitwise negation).
+  if (NewOpc != -1 && IsBitwiseNegate) {
+    // The input to the XORI has a record-form. Use it.
+    assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
+           "Expected a PPC::XORI8 only for bitwise negation.");
+    // Emit the record-form instruction.
+    std::vector<SDValue> Ops;
+    for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
+      Ops.push_back(OpToConvToRecForm.getOperand(i));
+
+    WideOp =
+      SDValue(CurDAG->getMachineNode(NewOpc, dl,
+                                     OpToConvToRecForm.getValueType(),
+                                     MVT::Glue, Ops), 0);
+  } else {
+    assert((NewOpc != -1 || !IsBitwiseNegate) &&
+           "No record form available for AND8/OR8/XOR8?");
+    WideOp =
+      SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
+                                     MVT::i64, MVT::Glue, LHS, RHS), 0);
+  }
+
+  // Select this node to a single bit from CR0 set by the record-form node
+  // just created. For bitwise negation, use the EQ bit which is the equivalent
+  // of negating the result (i.e. it is a bit set when the result of the
+  // operation is zero).
+  SDValue SRIdxVal =
+    CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
+  SDValue CRBit =
+    SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+                                   MVT::i1, CR0Reg, SRIdxVal,
+                                   WideOp.getValue(1)), 0);
+  ReplaceNode(N, CRBit.getNode());
+  return true;
+}
+
 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
 /// Useful when emitting comparison code for 32-bit values without using
 /// the compare instruction (which only considers the lower 32-bits).
@@ -2677,6 +2849,31 @@ SDValue PPCDAGToDAGISel::get32BitSExtCom
   }
 }
 
+/// Does this SDValue have any uses for which keeping the value in a GPR is
+/// appropriate. This is meant to be used on values that have type i1 since
+/// it is somewhat meaningless to ask if values of other types can be kept in
+/// GPR's.
+static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
+  assert(Compare.getOpcode() == ISD::SETCC &&
+         "An ISD::SETCC node required here.");
+
+  // For values that have a single use, the caller should obviously already have
+  // checked if that use is an extending use. We check the other uses here.
+  if (Compare.hasOneUse())
+    return true;
+  // We want the value in a GPR if it is being extended, used for a select, or
+  // used in logical operations.
+  for (auto CompareUse : Compare.getNode()->uses())
+    if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
+        CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
+        CompareUse->getOpcode() != ISD::SELECT &&
+        !isLogicOp(CompareUse->getOpcode())) {
+      OmittedForNonExtendUses++;
+      return false;
+    }
+  return true;
+}
+
 /// Returns an equivalent of a SETCC node but with the result the same width as
 /// the inputs. This can nalso be used for SELECT_CC if either the true or false
 /// values is a power of two while the other is zero.
@@ -2686,6 +2883,11 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(S
           Compare.getOpcode() == ISD::SELECT_CC) &&
          "An ISD::SETCC node required here.");
 
+  // Don't convert this comparison to a GPR sequence because there are uses
+  // of the i1 result (i.e. uses that require the result in the CR).
+  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
+    return SDValue();
+
   SDValue LHS = Compare.getOperand(0);
   SDValue RHS = Compare.getOperand(1);
 
@@ -2906,6 +3108,9 @@ void PPCDAGToDAGISel::Select(SDNode *N)
   }
 
   case ISD::AND: {
+    if (tryLogicOpOfCompares(N))
+      return;
+
     unsigned Imm, Imm2, SH, MB, ME;
     uint64_t Imm64;
 
@@ -3025,6 +3230,9 @@ void PPCDAGToDAGISel::Select(SDNode *N)
       if (tryBitfieldInsert(N))
         return;
 
+    if (tryLogicOpOfCompares(N))
+      return;
+
     short Imm;
     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
         isIntS16Immediate(N->getOperand(1), Imm)) {
@@ -3042,6 +3250,11 @@ void PPCDAGToDAGISel::Select(SDNode *N)
     // Other cases are autogenerated.
     break;
   }
+  case ISD::XOR: {
+    if (tryLogicOpOfCompares(N))
+      return;
+    break;
+  }
   case ISD::ADD: {
     short Imm;
     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td?rev=304282&r1=304281&r2=304282&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td Wed May 31 00:40:25 2017
@@ -735,12 +735,12 @@ def RLDICL_32_64 : MDForm_1<30, 0,
                             "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
                             []>, isPPC64;
 // End fast-isel.
-let isCodeGenOnly = 1 in
-def RLDICL_32 : MDForm_1<30, 0,
-                         (outs gprc:$rA),
-                         (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
-                         "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
-                         []>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm RLDICL_32 : MDForm_1r<30, 0,
+                           (outs gprc:$rA),
+                           (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+                           "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+                           []>, isPPC64;
 defm RLDICR : MDForm_1r<30, 1,
                         (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
                         "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=304282&r1=304281&r2=304282&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Wed May 31 00:40:25 2017
@@ -1983,3 +1983,7 @@ PPCInstrInfo::updatedRC(const TargetRegi
     return &PPC::VSRCRegClass;
   return RC;
 }
+
+int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
+  return PPC::getRecordFormOpcode(Opcode);
+}

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h?rev=304282&r1=304281&r2=304282&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h Wed May 31 00:40:25 2017
@@ -290,6 +290,7 @@ public:
     return Reg >= PPC::V0 && Reg <= PPC::V31;
   }
   const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
+  static int getRecordFormOpcode(unsigned Opcode);
 };
 
 }

Added: llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll?rev=304282&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/logic-ops-on-compares.ll Wed May 31 00:40:25 2017
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
+; RUN:   -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s \
+; RUN:  --implicit-check-not cmpw --implicit-check-not cmpd --implicit-check-not cmpl
+
+; Function Attrs: nounwind
+define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c) {
+; CHECK-LABEL: test:
+; CHECK:    xor r7, r3, r4
+; CHECK-NEXT:    li r6, 55
+; CHECK-NEXT:    xor r5, r5, r6
+; CHECK-NEXT:    or r7, r7, r4
+; CHECK-NEXT:    cntlzw r5, r5
+; CHECK-NEXT:    cntlzw r6, r7
+; CHECK-NEXT:    srwi r6, r6, 5
+; CHECK-NEXT:    srwi r5, r5, 5
+; CHECK-NEXT:    or. r5, r6, r5
+; CHECK-NEXT:    bc 4, 1
+entry:
+  %tobool = icmp eq i32 %a, %b
+  %tobool1 = icmp eq i32 %b, 0
+  %or.cond = and i1 %tobool, %tobool1
+  %tobool3 = icmp eq i32 %c, 55
+  %or.cond5 = or i1 %or.cond, %tobool3
+  br i1 %or.cond5, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call signext i32 @foo(i32 signext %a) #2
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %call4 = tail call signext i32 @bar(i32 signext %b) #2
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32 [ %call4, %if.end ], [ %call, %if.then ]
+  ret i32 %retval.0
+}
+
+define void @neg_truncate_i32(i32 *%ptr) {
+; CHECK-LABEL: neg_truncate_i32:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    lwz r3, 0(r3)
+; CHECK-NEXT:    rldicl. r3, r3, 0, 63
+; CHECK-NEXT:    bclr 12, 2, 0
+; CHECK-NEXT:  # BB#1: # %if.end29.thread136
+; CHECK-NEXT:  .LBB1_2: # %if.end29
+entry:
+  %0 = load i32, i32* %ptr, align 4
+  %rem17127 = and i32 %0, 1
+  %cmp18 = icmp eq i32 %rem17127, 0
+  br label %if.else
+
+if.else:                                          ; preds = %entry
+  br i1 %cmp18, label %if.end29, label %if.end29.thread136
+
+if.end29.thread136:                               ; preds = %if.else
+  unreachable
+
+if.end29:                                         ; preds = %if.else
+  ret void
+
+}
+
+declare signext i32 @foo(i32 signext)
+declare signext i32 @bar(i32 signext)




More information about the llvm-commits mailing list