[llvm] r179134 - Allow PPC B and BLR to be if-converted into some predicated forms

Bill Schmidt wschmidt at linux.vnet.ibm.com
Tue Apr 9 19:29:09 PDT 2013


On Tue, 2013-04-09 at 22:58 +0000, Hal Finkel wrote:
> Author: hfinkel
> Date: Tue Apr  9 17:58:37 2013
> New Revision: 179134
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=179134&view=rev
> Log:
> Allow PPC B and BLR to be if-converted into some predicated forms
> 
> This enables us to form predicated branches (which are the same conditional
> branches we had before) and also a larger set of predicated returns (including
> instructions like bdnzlr which is a conditional return and loop-counter
> decrement all in one).
> 
> At the moment, if conversion does not capture all possible opportunities. A
> simple example is provided in early-ret2.ll, where if conversion forms one
> predicated return, and then the PPCEarlyReturn pass picks up the other one. So,
> at least for now, we'll keep both mechanisms.
> 
> Added:
>     llvm/trunk/test/CodeGen/PowerPC/bdzlr.ll
>     llvm/trunk/test/CodeGen/PowerPC/early-ret2.ll
> Modified:
>     llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
>     llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
>     llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
>     llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
>     llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td?rev=179134&r1=179133&r2=179134&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td Tue Apr  9 17:58:37 2013
> @@ -83,8 +83,17 @@ let isBranch = 1, isTerminator = 1, hasC
>      def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
>                          "bdnz $dst">;
>    }
> +
> +  let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
> +    def BDZLR8  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
> +                              "bdzlr", BrB, []>;
> +    def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
> +                              "bdnzlr", BrB, []>;
> +  }
>  }
> 
> +
> +
>  let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
>    // Convenient aliases for call instructions
>    let Uses = [RM] in {
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=179134&r1=179133&r2=179134&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Tue Apr  9 17:58:37 2013
> @@ -876,6 +876,143 @@ bool PPCInstrInfo::FoldImmediate(Machine
>    return true;
>  }
> 
> +bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
> +  unsigned OpC = MI->getOpcode();
> +  switch (OpC) {
> +  default:
> +    return false;
> +  case PPC::BCC:
> +  case PPC::BCLR:
> +  case PPC::BDZLR:
> +  case PPC::BDZLR8:
> +  case PPC::BDNZLR:
> +  case PPC::BDNZLR8:
> +    return true;
> +  }
> +}
> +
> +bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
> +  if (!MI->isTerminator())
> +    return false;
> +
> +  // Conditional branch is a special case.
> +  if (MI->isBranch() && !MI->isBarrier())
> +    return true;
> +
> +  return !isPredicated(MI);
> +}
> +
> +bool PPCInstrInfo::PredicateInstruction(
> +                     MachineInstr *MI,
> +                     const SmallVectorImpl<MachineOperand> &Pred) const {
> +  unsigned OpC = MI->getOpcode();
> +  if (OpC == PPC::BLR) {
> +    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
> +      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
> +      MI->setDesc(get(Pred[0].getImm() ?
> +                      (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
> +                      (isPPC64 ? PPC::BDZLR8  : PPC::BDZLR)));
> +    } else {
> +      MI->setDesc(get(PPC::BCLR));
> +      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
> +        .addImm(Pred[0].getImm())
> +        .addReg(Pred[1].getReg());
> +    }
> +
> +    return true;
> +  } else if (OpC == PPC::B) {
> +    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
> +      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
> +      MI->setDesc(get(Pred[0].getImm() ?
> +                      (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
> +                      (isPPC64 ? PPC::BDZ8  : PPC::BDZ)));
> +    } else {
> +      MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
> +      MI->RemoveOperand(0);
> +
> +      MI->setDesc(get(PPC::BCC));
> +      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
> +        .addImm(Pred[0].getImm())
> +        .addReg(Pred[1].getReg())
> +        .addMBB(MBB);
> +    }
> +
> +    return true;
> +  }
> +
> +  return false;
> +}
> +
> +bool PPCInstrInfo::SubsumesPredicate(
> +                     const SmallVectorImpl<MachineOperand> &Pred1,
> +                     const SmallVectorImpl<MachineOperand> &Pred2) const {
> +  assert(Pred1.size() == 2 && "Invalid PPC first predicate");
> +  assert(Pred2.size() == 2 && "Invalid PPC second predicate");
> +
> +  if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
> +    return false;
> +  if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
> +    return false;
> +
> +  PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
> +  PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
> +
> +  if (P1 == P2)
> +    return true;
> +
> +  // Does P1 subsume P2, e.g. GE subsumes GT.
> +  if (P1 == PPC::PRED_LE &&
> +      (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
> +    return true;
> +  if (P1 == PPC::PRED_GE &&
> +      (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
> +    return true;
> +
> +  return false;
> +}
> +
> +bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
> +                                    std::vector<MachineOperand> &Pred) const {
> +  // Note: At the present time, the contents of Pred from this function is
> +  // unused by IfConversion. This implementation follows ARM by pushing the
> +  // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
> +  // predicate, instructions defining CTR or CTR8 are also included as
> +  // predicate-defining instructions.
> +
> +  const TargetRegisterClass *RCs[] =
> +    { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
> +      &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
> +
> +  bool Found = false;
> +  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
> +    const MachineOperand &MO = MI->getOperand(i);
> +    for (int c = 0; c < 2 && !Found; ++c) {

Hi Hal,

The check for c < 2 puzzles me; it appears you're missing the CTR cases
here as a result.  I assume the intent is to walk all the classes in
RCs.

> +      const TargetRegisterClass *RC = RCs[c];
> +      for (TargetRegisterClass::iterator I = RC->begin(),
> +           IE = RC->end(); I != IE; ++I) {
> +        if ((MO.isRegMask() && MO.clobbersPhysReg(*I)) ||
> +            (MO.isReg() && MO.isDef() && MO.getReg() == *I)) {
> +          Pred.push_back(MO);
> +          Found = true;
> +        }
> +      }

This seems rather inefficient for the typical case where the operand is
a register, walking through all the registers in the register class
looking for a match.  (I realize this appears to be necessary for the
register mask case, since I couldn't find an interface for checking
whether a register mask clobbers an element of a particular register
class.)  I'd suggest reorganizing along the lines of:

  foreach operand MO
    foreach register class RC
      if MO.isReg()
        if MO.isDef() && RC.hasSubClassEq(RI.getRegClass(MO.getReg()))
          success
        end
      else if MO.isRegMask()
        foreach reg R in RC
          if MO.clobbersPhysReg(R)
            success
          end
        end
      end
    end
  end

This will make the common case faster and, incidentally, allow you to
remove CRBITRCRegClass from the list of classes because of the subclass
test.  (This assumes MO.clobbersPhysReg(R) is consistent in the sense
that a clobber of a CRBIT register implies a clobber of the containing
CR register.)  The register mask case is still slow, but those being
much rarer will have less impact on compile time.

A general comment on all the predicate machinery:  Will it need to be
extended somewhat to handle the multiconditional cases like bdnzt?  This
assumes an immediate (BIBO, I suppose) and a register, but those will
need a second register.  On the plus side, I guess we'll have to learn
why the TblGen machinery for the BIBO stuff works... ;)

> +    }
> +  }
> +
> +  return Found;
> +}
> +
> +bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
> +  unsigned OpC = MI->getOpcode();
> +  switch (OpC) {
> +  default:
> +    return false;
> +  case PPC::B:
> +  case PPC::BLR:
> +    return true;
> +  }
> +}
> +
>  /// GetInstSize - Return the number of bytes of code the specified
>  /// instruction may be.  This returns the maximum number of bytes.
>  ///
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h?rev=179134&r1=179133&r2=179134&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h Tue Apr  9 17:58:37 2013
> @@ -160,6 +160,53 @@ public:
>    virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
>                               unsigned Reg, MachineRegisterInfo *MRI) const;
> 
> +  // If conversion by predication (only supported by some branch instructions).
> +  // All of the profitability checks always return true; it is always
> +  // profitable to use the predicated branches.
> +  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
> +                                   unsigned NumCycles, unsigned ExtraPredCycles,
> +                                   const BranchProbability &Probability) const {
> +    return true;
> +  }
> +
> +  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
> +                                   unsigned NumT, unsigned ExtraT,
> +                                   MachineBasicBlock &FMBB,
> +                                   unsigned NumF, unsigned ExtraF,
> +                                   const BranchProbability &Probability) const {
> +    return true;
> +  }
> +
> +  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
> +                                         unsigned NumCycles,
> +                                         const BranchProbability
> +                                         &Probability) const {
> +    return true;
> +  }
> +
> +  virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
> +                                         MachineBasicBlock &FMBB) const {
> +    return false;
> +  }
> +
> +  // Predication support.
> +  bool isPredicated(const MachineInstr *MI) const;
> +
> +  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
> +
> +  virtual
> +  bool PredicateInstruction(MachineInstr *MI,
> +                            const SmallVectorImpl<MachineOperand> &Pred) const;
> +
> +  virtual
> +  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
> +                         const SmallVectorImpl<MachineOperand> &Pred2) const;
> +
> +  virtual bool DefinesPredicate(MachineInstr *MI,
> +                                std::vector<MachineOperand> &Pred) const;
> +
> +  virtual bool isPredicable(MachineInstr *MI) const;
> +
>    /// GetInstSize - Return the number of bytes of code the specified
>    /// instruction may be.  This returns the maximum number of bytes.
>    ///
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=179134&r1=179133&r2=179134&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue Apr  9 17:58:37 2013
> @@ -518,6 +518,13 @@ let isBranch = 1, isTerminator = 1, hasC
>      let isReturn = 1, Uses = [LR, RM] in
>      def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
>                             "b${cond:cc}lr ${cond:reg}", BrB, []>;
> +
> +    let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
> +      def BDZLR  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
> +                             "bdzlr", BrB, []>;
> +      def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
> +                             "bdnzlr", BrB, []>;
> +    }
>    }
> 
>    let Defs = [CTR], Uses = [CTR] in {
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp?rev=179134&r1=179133&r2=179134&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp Tue Apr  9 17:58:37 2013
> @@ -93,6 +93,7 @@ public:
>    virtual bool addPreRegAlloc();
>    virtual bool addILPOpts();
>    virtual bool addInstSelector();
> +  virtual bool addPreSched2();
>    virtual bool addPreEmitPass();
>  };
>  } // namespace
> @@ -123,6 +124,13 @@ bool PPCPassConfig::addInstSelector() {
>    return false;
>  }
> 
> +bool PPCPassConfig::addPreSched2() {
> +  if (getOptLevel() != CodeGenOpt::None)
> +    addPass(&IfConverterID);
> +
> +  return true;
> +}
> +
>  bool PPCPassConfig::addPreEmitPass() {
>    if (getOptLevel() != CodeGenOpt::None)
>      addPass(createPPCEarlyReturnPass());
> 
> Added: llvm/trunk/test/CodeGen/PowerPC/bdzlr.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/bdzlr.ll?rev=179134&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/bdzlr.ll (added)
> +++ llvm/trunk/test/CodeGen/PowerPC/bdzlr.ll Tue Apr  9 17:58:37 2013

Any way you could simplify this test a bit?  Seems like a lot to parse
and process for the functionality being tested.  If not, a comment as to
what the test is doing seems warranted.

Nice to see the progress here!

Thanks,
Bill

> @@ -0,0 +1,63 @@
> +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
> +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
> +target triple = "powerpc64-unknown-linux-gnu"
> +
> +%struct.lua_TValue.17.692 = type { %union.Value.16.691, i32 }
> +%union.Value.16.691 = type { %union.GCObject.15.690* }
> +%union.GCObject.15.690 = type { %struct.lua_State.14.689 }
> +%struct.lua_State.14.689 = type { %union.GCObject.15.690*, i8, i8, i8, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.global_State.10.685*, %struct.CallInfo.11.686*, i32*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.CallInfo.11.686*, %struct.CallInfo.11.686*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State.14.689*, %struct.lua_Debug.12.687*)*, %struct.lua_TValue.17.692, %struct.lua_TValue.17.692, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.lua_longjmp.13.688*, i64 }
> +%struct.global_State.10.685 = type { %struct.stringtable.0.675, i8* (i8*, i8*, i64, i64)*, i8*, i8, i8, i32, %union.GCObject.15.690*, %union.GCObject.15.690**, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.Mbuffer.1.676, i64, i64, i64, i64, i32, i32, i32 (%struct.lua_State.14.689*)*, %struct.lua_TValue.17.692, %struct.lua_State.14.689*, %struct.UpVal.3.678, [9 x %struct.Table.7.682*], [17 x %union.TString.9.684*] }
> +%struct.stringtable.0.675 = type { %union.GCObject.15.690**, i32, i32 }
> +%struct.Mbuffer.1.676 = type { i8*, i64, i64 }
> +%struct.UpVal.3.678 = type { %union.GCObject.15.690*, i8, i8, %struct.lua_TValue.17.692*, %union.anon.2.677 }
> +%union.anon.2.677 = type { %struct.lua_TValue.17.692 }
> +%struct.Table.7.682 = type { %union.GCObject.15.690*, i8, i8, i8, i8, %struct.Table.7.682*, %struct.lua_TValue.17.692*, %struct.Node.6.681*, %struct.Node.6.681*, %union.GCObject.15.690*, i32 }
> +%struct.Node.6.681 = type { %struct.lua_TValue.17.692, %union.TKey.5.680 }
> +%union.TKey.5.680 = type { %struct.anon.0.4.679 }
> +%struct.anon.0.4.679 = type { %union.Value.16.691, i32, %struct.Node.6.681* }
> +%union.TString.9.684 = type { %struct.anon.1.8.683 }
> +%struct.anon.1.8.683 = type { %union.GCObject.15.690*, i8, i8, i8, i32, i64 }
> +%struct.CallInfo.11.686 = type { %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, i32*, i32, i32 }
> +%struct.lua_Debug.12.687 = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
> +%struct.lua_longjmp.13.688 = type opaque
> +
> +define void @lua_xmove(i32 signext %n) #0 {
> +entry:
> +  br i1 undef, label %for.end, label %if.end
> +
> +if.end:                                           ; preds = %entry
> +  br i1 undef, label %for.body.lr.ph, label %for.end
> +
> +for.body.lr.ph:                                   ; preds = %if.end
> +  br label %for.body
> +
> +for.body:                                         ; preds = %for.body.for.body_crit_edge, %for.body.lr.ph
> +  %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
> +  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
> +  %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
> +  %1 = load i32* %tt, align 4, !tbaa !0
> +  store i32 %1, i32* undef, align 4, !tbaa !0
> +  %indvars.iv.next = add i64 %indvars.iv, 1
> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> +  %exitcond = icmp eq i32 %lftr.wideiv, %n
> +  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
> +
> +for.body.for.body_crit_edge:                      ; preds = %for.body
> +  %.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3
> +  br label %for.body
> +
> +for.end:                                          ; preds = %for.body, %if.end, %entry
> +  ret void
> +
> +; CHECK: @lua_xmove
> +; CHECK: bnelr
> +; CHECK: bnelr
> +; CHECK: bdzlr
> +}
> +
> +attributes #0 = { nounwind }
> +
> +!0 = metadata !{metadata !"int", metadata !1}
> +!1 = metadata !{metadata !"omnipotent char", metadata !2}
> +!2 = metadata !{metadata !"Simple C/C++ TBAA"}
> +!3 = metadata !{metadata !"any pointer", metadata !1}
> 
> Added: llvm/trunk/test/CodeGen/PowerPC/early-ret2.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/early-ret2.ll?rev=179134&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/early-ret2.ll (added)
> +++ llvm/trunk/test/CodeGen/PowerPC/early-ret2.ll Tue Apr  9 17:58:37 2013
> @@ -0,0 +1,26 @@
> +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
> +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
> +target triple = "powerpc64-unknown-linux-gnu"
> +
> +define void @_Z8example3iPiS_() #0 {
> +entry:
> +  br i1 undef, label %while.end, label %while.body.lr.ph
> +
> +while.body.lr.ph:                                 ; preds = %entry
> +  br i1 undef, label %while.end, label %while.body
> +
> +while.body:                                       ; preds = %while.body, %while.body.lr.ph
> +  br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0
> +
> +while.end:                                        ; preds = %while.body, %while.body.lr.ph, %entry
> +  ret void
> +
> +; CHECK: @_Z8example3iPiS_
> +; CHECK: bnelr
> +; CHECK: bnelr
> +}
> +
> +attributes #0 = { noinline nounwind }
> +
> +!0 = metadata !{}
> +
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 




More information about the llvm-commits mailing list