[llvm] r187723 - [SystemZ] Use BRCT and BRCTG to eliminate add-&-compare sequences

Richard Sandiford rsandifo at linux.vnet.ibm.com
Mon Aug 5 04:23:46 PDT 2013


Author: rsandifo
Date: Mon Aug  5 06:23:46 2013
New Revision: 187723

URL: http://llvm.org/viewvc/llvm-project?rev=187723&view=rev
Log:
[SystemZ] Use BRCT and BRCTG to eliminate add-&-compare sequences

This patch just uses a peephole test for "add; compare; branch" sequences
within a single block.  The IR optimizers already convert loops to
decrement-and-branch-on-nonzero form in some cases, so even this
simplistic test triggers many times during a clang bootstrap and
projects/test-suite run.  It looks like there are still cases where we
need to more strongly prefer branches on nonzero though.  E.g. I saw a
case where a loop that started out with a check for 0 ended up with a
check for -1.  I'll try to look at that sometime.

I ended up adding the Reference class because MachineInstr::readsRegister()
doesn't check for subregisters (by design, as far as I could tell).

Added:
    llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-07.py
    llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-08.py
Modified:
    llvm/trunk/lib/Target/SystemZ/SystemZElimCompare.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h
    llvm/trunk/lib/Target/SystemZ/SystemZLongBranch.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp
    llvm/trunk/test/CodeGen/SystemZ/loop-01.ll

Modified: llvm/trunk/lib/Target/SystemZ/SystemZElimCompare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZElimCompare.cpp?rev=187723&r1=187722&r2=187723&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZElimCompare.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZElimCompare.cpp Mon Aug  5 06:23:46 2013
@@ -28,10 +28,38 @@
 
 using namespace llvm;
 
+STATISTIC(BranchOnCounts, "Number of branch-on-count instructions");
 STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
 STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
 
 namespace {
+  // Represents the references to a particular register in one or more
+  // instructions.
+  struct Reference {
+    Reference()
+      : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {}
+
+    Reference &operator|=(const Reference &Other) {
+      Def |= Other.Def;
+      IndirectDef |= Other.IndirectDef;
+      Use |= Other.Use;
+      IndirectUse |= Other.IndirectUse;
+      return *this;
+    }
+
+    operator bool() const { return Def || Use; }
+
+    // True if the register is defined or used in some form, either directly or
+    // via a sub- or super-register.
+    bool Def;
+    bool Use;
+
+    // True if the register is defined or used indirectly, by a sub- or
+    // super-register.
+    bool IndirectDef;
+    bool IndirectUse;
+  };
+
   class SystemZElimCompare : public MachineFunctionPass {
   public:
     static char ID;
@@ -46,6 +74,9 @@ namespace {
     bool runOnMachineFunction(MachineFunction &F);
 
   private:
+    Reference getRegReferences(MachineInstr *MI, unsigned Reg);
+    bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
+                       SmallVectorImpl<MachineInstr *> &CCUsers);
     bool convertToLoadAndTest(MachineInstr *MI);
     bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare,
                                SmallVectorImpl<MachineInstr *> &CCUsers);
@@ -99,6 +130,80 @@ static bool resultTests(MachineInstr *MI
   return false;
 }
 
+// Describe the references to Reg in MI, including sub- and super-registers.
+Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
+  Reference Ref;
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    if (MO.isReg()) {
+      if (unsigned MOReg = MO.getReg()) {
+        if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) {
+          if (MO.isUse()) {
+            Ref.Use = true;
+            Ref.IndirectUse |= (MOReg != Reg);
+          }
+          if (MO.isDef()) {
+            Ref.Def = true;
+            Ref.IndirectDef |= (MOReg != Reg);
+          }
+        }
+      }
+    }
+  }
+  return Ref;
+}
+
+// Compare compares the result of MI against zero.  If MI is an addition
+// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
+// and convert the branch to a BRCT(G).  Return true on success.
+bool
+SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
+                                  SmallVectorImpl<MachineInstr *> &CCUsers) {
+  // Check whether we have an addition of -1.
+  unsigned Opcode = MI->getOpcode();
+  unsigned BRCT;
+  if (Opcode == SystemZ::AHI)
+    BRCT = SystemZ::BRCT;
+  else if (Opcode == SystemZ::AGHI)
+    BRCT = SystemZ::BRCTG;
+  else
+    return false;
+  if (MI->getOperand(2).getImm() != -1)
+    return false;
+
+  // Check whether we have a single JLH.
+  if (CCUsers.size() != 1)
+    return false;
+  MachineInstr *Branch = CCUsers[0];
+  if (Branch->getOpcode() != SystemZ::BRC ||
+      Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP ||
+      Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE)
+    return false;
+
+  // We already know that there are no references to the register between
+  // MI and Compare.  Make sure that there are also no references between
+  // Compare and Branch.
+  unsigned SrcReg = Compare->getOperand(0).getReg();
+  MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
+  for (++MBBI; MBBI != MBBE; ++MBBI)
+    if (getRegReferences(MBBI, SrcReg))
+      return false;
+
+  // The transformation is OK.  Rebuild Branch as a BRCT(G).
+  MachineOperand Target(Branch->getOperand(2));
+  Branch->RemoveOperand(2);
+  Branch->RemoveOperand(1);
+  Branch->RemoveOperand(0);
+  Branch->setDesc(TII->get(BRCT));
+  MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
+    .addOperand(MI->getOperand(0))
+    .addOperand(MI->getOperand(1))
+    .addOperand(Target)
+    .addReg(SystemZ::CC, RegState::ImplicitDefine);
+  MI->removeFromParent();
+  return true;
+}
+
 // If MI is a load instruction, try to convert it into a LOAD AND TEST.
 // Return true on success.
 bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) {
@@ -210,21 +315,32 @@ optimizeCompareZero(MachineInstr *Compar
   unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
   MachineBasicBlock *MBB = Compare->getParent();
   MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->begin();
-  bool SeenUseOfCC = false;
+  Reference CCRefs;
+  Reference SrcRefs;
   while (MBBI != MBBE) {
     --MBBI;
     MachineInstr *MI = MBBI;
-    if (resultTests(MI, SrcReg, SrcSubReg) &&
-        ((!SeenUseOfCC && convertToLoadAndTest(MI)) ||
-         adjustCCMasksForInstr(MI, Compare, CCUsers))) {
-      EliminatedComparisons += 1;
-      return true;
+    if (resultTests(MI, SrcReg, SrcSubReg)) {
+      // Try to remove both MI and Compare by converting a branch to BRCT(G).
+      // We don't care in this case whether CC is modified between MI and
+      // Compare.
+      if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) {
+        BranchOnCounts += 1;
+        return true;
+      }
+      // Try to eliminate Compare by reusing a CC result from MI.
+      if ((!CCRefs && convertToLoadAndTest(MI)) ||
+          (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) {
+        EliminatedComparisons += 1;
+        return true;
+      }
     }
-    if (MI->modifiesRegister(SrcReg, TRI) ||
-        MI->modifiesRegister(SystemZ::CC, TRI))
+    SrcRefs |= getRegReferences(MI, SrcReg);
+    if (SrcRefs.Def)
+      return false;
+    CCRefs |= getRegReferences(MI, SystemZ::CC);
+    if (CCRefs.Use && CCRefs.Def)
       return false;
-    if (MI->readsRegister(SystemZ::CC, TRI))
-      SeenUseOfCC = true;
   }
   return false;
 }
@@ -316,13 +432,12 @@ bool SystemZElimCompare::processBlock(Ma
       continue;
     }
 
-    if (MI->definesRegister(SystemZ::CC, TRI)) {
+    Reference CCRefs(getRegReferences(MI, SystemZ::CC));
+    if (CCRefs.Def) {
       CCUsers.clear();
-      CompleteCCUsers = true;
-    } else if (MI->modifiesRegister(SystemZ::CC, TRI))
-      CompleteCCUsers = false;
-
-    if (CompleteCCUsers && MI->readsRegister(SystemZ::CC, TRI))
+      CompleteCCUsers = !CCRefs.IndirectDef;
+    }
+    if (CompleteCCUsers && CCRefs.Use)
       CCUsers.push_back(MI);
   }
   return Changed;

Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp?rev=187723&r1=187722&r2=187723&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp Mon Aug  5 06:23:46 2013
@@ -684,6 +684,14 @@ SystemZInstrInfo::getBranchInfo(const Ma
                              MI->getOperand(0).getImm(),
                              MI->getOperand(1).getImm(), &MI->getOperand(2));
 
+  case SystemZ::BRCT:
+    return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP,
+                             SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+
+  case SystemZ::BRCTG:
+    return SystemZII::Branch(SystemZII::BranchCTG, SystemZ::CCMASK_ICMP,
+                             SystemZ::CCMASK_CMP_NE, &MI->getOperand(2));
+
   case SystemZ::CIJ:
   case SystemZ::CRJ:
     return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,

Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h?rev=187723&r1=187722&r2=187723&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h Mon Aug  5 06:23:46 2013
@@ -69,7 +69,15 @@ namespace SystemZII {
 
     // An instruction that peforms a 64-bit signed comparison and branches
     // on the result.
-    BranchCG
+    BranchCG,
+
+    // An instruction that decrements a 32-bit register and branches if
+    // the result is nonzero.
+    BranchCT,
+
+    // An instruction that decrements a 64-bit register and branches if
+    // the result is nonzero.
+    BranchCTG
   };
   // Information about a branch instruction.
   struct Branch {

Modified: llvm/trunk/lib/Target/SystemZ/SystemZLongBranch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZLongBranch.cpp?rev=187723&r1=187722&r2=187723&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZLongBranch.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZLongBranch.cpp Mon Aug  5 06:23:46 2013
@@ -148,6 +148,7 @@ namespace {
     bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address);
     bool mustRelaxABranch();
     void setWorstCaseAddresses();
+    void splitBranchOnCount(MachineInstr *MI, unsigned AddOpcode);
     void splitCompareBranch(MachineInstr *MI, unsigned CompareOpcode);
     void relaxBranch(TerminatorInfo &Terminator);
     void relaxBranches();
@@ -218,6 +219,11 @@ TerminatorInfo SystemZLongBranch::descri
       // Relaxes to BRCL, which is 2 bytes longer.
       Terminator.ExtraRelaxSize = 2;
       break;
+    case SystemZ::BRCT:
+    case SystemZ::BRCTG:
+      // Relaxes to A(G)HI and BRCL, which is 6 bytes longer.
+      Terminator.ExtraRelaxSize = 6;
+      break;
     case SystemZ::CRJ:
       // Relaxes to a CR/BRCL sequence, which is 2 bytes longer.
       Terminator.ExtraRelaxSize = 2;
@@ -330,6 +336,25 @@ void SystemZLongBranch::setWorstCaseAddr
   }
 }
 
+// Split BRANCH ON COUNT MI into the addition given by AddOpcode followed
+// by a BRCL on the result.
+void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
+                                           unsigned AddOpcode) {
+  MachineBasicBlock *MBB = MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
+    .addOperand(MI->getOperand(0))
+    .addOperand(MI->getOperand(1))
+    .addImm(-1);
+  MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
+    .addImm(SystemZ::CCMASK_ICMP)
+    .addImm(SystemZ::CCMASK_CMP_NE)
+    .addOperand(MI->getOperand(2));
+  // The implicit use of CC is a killing use.
+  BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
+  MI->eraseFromParent();
+}
+
 // Split MI into the comparison given by CompareOpcode followed
 // a BRCL on the result.
 void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
@@ -358,6 +383,12 @@ void SystemZLongBranch::relaxBranch(Term
   case SystemZ::BRC:
     Branch->setDesc(TII->get(SystemZ::BRCL));
     break;
+  case SystemZ::BRCT:
+    splitBranchOnCount(Branch, SystemZ::AHI);
+    break;
+  case SystemZ::BRCTG:
+    splitBranchOnCount(Branch, SystemZ::AGHI);
+    break;
   case SystemZ::CRJ:
     splitCompareBranch(Branch, SystemZ::CR);
     break;

Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp?rev=187723&r1=187722&r2=187723&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp Mon Aug  5 06:23:46 2013
@@ -82,6 +82,9 @@ bool SystemZPassConfig::addPreEmitPass()
   // CC values (while still being worthwhile) and others that happen to make
   // the CC result more useful than it was originally.
   //
+  // Another reason is that we only want to use BRANCH ON COUNT in cases
+  // where we know that the count register is not going to be spilled.
+  //
   // Doing it so late makes it more likely that a register will be reused
   // between the comparison and the branch, but it isn't clear whether
   // preventing that would be a win or not.

Added: llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-07.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-07.py?rev=187723&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-07.py (added)
+++ llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-07.py Mon Aug  5 06:23:46 2013
@@ -0,0 +1,68 @@
+# Test 32-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
+# of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# loopN:
+#   load of countN
+#   ...
+# loop0:
+#   0xffd8 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   decrement of countN
+#   conditional branch to loopN
+# afterN:
+#
+# Each load occupies 4 bytes.  Each decrement and branch occupies 4
+# bytes if BRCT can be used, otherwise it occupies 10 bytes (AHI + BRCL).
+# This means that loop 6 contains 5 * 4 + 0xffd8 + 5 * 4 == 0x10000 bytes
+# and is therefore (just) in range.  Loop 7 is out of range.
+#
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: brct {{%r[0-9]+}}
+# CHECK: ahi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: ahi {{%r[0-9]+}}, -1
+# CHECK: jglh
+
+branch_blocks = 8
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i32 *%counts) {'
+print 'entry:'
+
+for i in xrange(branch_blocks - 1, -1, -1):
+    print '  %%countptr%d = getelementptr i32 *%%counts, i64 %d' % (i, i)
+    print '  %%initcount%d = load i32 *%%countptr%d' % (i, i)
+    print '  br label %%loop%d' % i
+    
+    print 'loop%d:' % i
+    block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
+    block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
+    print ('  %%count%d = phi i32 [ %%initcount%d, %%%s ],'
+           ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
+
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%nextcount%d = add i32 %%count%d, -1' % (i, i)
+    print '  %%test%d = icmp ne i32 %%nextcount%d, 0' % (i, i)
+    print '  br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'

Added: llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-08.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-08.py?rev=187723&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-08.py (added)
+++ llvm/trunk/test/CodeGen/SystemZ/Large/branch-range-08.py Mon Aug  5 06:23:46 2013
@@ -0,0 +1,69 @@
+# Test 64-bit BRANCH RELATIVE ON COUNT in cases where some branches are out
+# of range.
+# RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
+
+# Construct:
+#
+# loopN:
+#   load of countN
+#   ...
+# loop0:
+#   0xffd8 bytes, from MVIY instructions
+#   conditional branch to main
+# after0:
+#   ...
+#   decrement of countN
+#   conditional branch to loopN
+# afterN:
+#
+# Each load occupies 6 bytes.  Each decrement and branch occupies 4
+# bytes if BRCTG can be used, otherwise it occupies 10 bytes (AGHI + BRCL).
+# This means that loop 5 contains 4 * 6 + 0xffd8 + 4 * 4 == 0x10000 bytes
+# and is therefore (just) in range.  Loop 6 is out of range.
+#
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: brctg {{%r[0-9]+}}
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+# CHECK: aghi {{%r[0-9]+}}, -1
+# CHECK: jglh
+
+branch_blocks = 8
+main_size = 0xffd8
+
+print 'define void @f1(i8 *%base, i64 *%counts) {'
+print 'entry:'
+
+for i in xrange(branch_blocks - 1, -1, -1):
+    print '  %%countptr%d = getelementptr i64 *%%counts, i64 %d' % (i, i)
+    print '  %%initcount%d = load i64 *%%countptr%d' % (i, i)
+    print '  br label %%loop%d' % i
+    
+    print 'loop%d:' % i
+    block1 = 'entry' if i == branch_blocks - 1 else 'loop%d' % (i + 1)
+    block2 = 'loop0' if i == 0 else 'after%d' % (i - 1)
+    print ('  %%count%d = phi i64 [ %%initcount%d, %%%s ],'
+           ' [ %%nextcount%d, %%%s ]' % (i, i, block1, i, block2))
+
+a, b = 1, 1
+for i in xrange(0, main_size, 6):
+    a, b = b, a + b
+    offset = 4096 + b % 500000
+    value = a % 256
+    print '  %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+    print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
+
+for i in xrange(branch_blocks):
+    print '  %%nextcount%d = add i64 %%count%d, -1' % (i, i)
+    print '  %%test%d = icmp ne i64 %%nextcount%d, 0' % (i, i)
+    print '  br i1 %%test%d, label %%loop%d, label %%after%d' % (i, i, i)
+    print ''
+    print 'after%d:' % i
+
+print '  ret void'
+print '}'

Modified: llvm/trunk/test/CodeGen/SystemZ/loop-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/loop-01.ll?rev=187723&r1=187722&r2=187723&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/loop-01.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/loop-01.ll Mon Aug  5 06:23:46 2013
@@ -5,7 +5,7 @@
 ; Test that strength reduction is applied to addresses with a scale factor,
 ; but that indexed addressing can still be used.
 define void @f1(i32 *%dest, i32 %a) {
-; CHECK-LABEL: f1
+; CHECK-LABEL: f1:
 ; CHECK-NOT: sllg
 ; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
 ; CHECK: br %r14
@@ -22,4 +22,103 @@ loop:
 
 exit:
   ret void
+}
+
+; Test a loop that should be converted into dbr form and then use BRCT.
+define void @f2(i32 *%src, i32 *%dest) {
+; CHECK-LABEL: f2:
+; CHECK: lhi [[REG:%r[0-5]]], 100
+; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
+; CHECK: brct [[REG]], [[LABEL]]
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
+  %next = add i32 %count, 1
+  %val = load volatile i32 *%src
+  %cmp = icmp eq i32 %val, 0
+  br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+  %add = add i32 %val, 1
+  store volatile i32 %add, i32 *%dest
+  br label %loop.next
+
+loop.next:
+  %cont = icmp ne i32 %next, 100
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Like f2, but for BRCTG.
+define void @f3(i64 *%src, i64 *%dest) {
+; CHECK-LABEL: f3:
+; CHECK: lghi [[REG:%r[0-5]]], 100
+; CHECK: [[LABEL:\.[^:]*]]:{{.*}} %loop
+; CHECK: brctg [[REG]], [[LABEL]]
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
+  %next = add i64 %count, 1
+  %val = load volatile i64 *%src
+  %cmp = icmp eq i64 %val, 0
+  br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+  %add = add i64 %val, 1
+  store volatile i64 %add, i64 *%dest
+  br label %loop.next
+
+loop.next:
+  %cont = icmp ne i64 %next, 100
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Test a loop with a 64-bit decremented counter in which the 32-bit
+; low part of the counter is used after the decrement.  This is an example
+; of a subregister use being the only thing that blocks a conversion to BRCTG.
+define void @f4(i32 *%src, i32 *%dest, i64 *%dest2, i64 %count) {
+; CHECK-LABEL: f4:
+; CHECK: aghi [[REG:%r[0-5]]], -1
+; CHECK: lr [[REG2:%r[0-5]]], [[REG]]
+; CHECK: stg [[REG2]],
+; CHECK: jne {{\..*}}
+; CHECK: br %r14
+entry:
+  br label %loop
+
+loop:
+  %left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
+  store volatile i64 %left, i64 *%dest2
+  %val = load volatile i32 *%src
+  %cmp = icmp eq i32 %val, 0
+  br i1 %cmp, label %loop.next, label %loop.store
+
+loop.store:
+  %add = add i32 %val, 1
+  store volatile i32 %add, i32 *%dest
+  br label %loop.next
+
+loop.next:
+  %next = add i64 %left, -1
+  %ext = zext i32 %val to i64
+  %shl = shl i64 %ext, 32
+  %and = and i64 %next, 4294967295
+  %or = or i64 %shl, %and
+  store volatile i64 %or, i64 *%dest2
+  %cont = icmp ne i64 %next, 0
+  br i1 %cont, label %loop, label %exit
+
+exit:
+  ret void
 }





More information about the llvm-commits mailing list