[llvm] r343100 - [PowerPC] optimize conditional branch on CRSET/CRUNSET

Hiroshi Inoue via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 26 05:32:45 PDT 2018


Author: inouehrs
Date: Wed Sep 26 05:32:45 2018
New Revision: 343100

URL: http://llvm.org/viewvc/llvm-project?rev=343100&view=rev
Log:
[PowerPC] optimize conditional branch on CRSET/CRUNSET

This patch adds a check to optimize conditional branch (BC and BCn) based on a constant set by CRSET or CRUNSET.
Other optimizers, such as block placement, may generate such code and hence
I do this at the very end of the optimization in pre-emit peephole pass.

A conditional branch based on a constant is eliminated or converted into unconditional branch. 
Also CRSET/CRUNSET is eliminated if the condition code register is not used
by instruction other than the branch to be optimized.

Differential Revision: https://reviews.llvm.org/D52345


Added:
    llvm/trunk/test/CodeGen/PowerPC/setcr_bc.mir
    llvm/trunk/test/CodeGen/PowerPC/setcr_bc2.mir
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCPreEmitPeephole.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCPreEmitPeephole.cpp?rev=343100&r1=343099&r2=343100&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCPreEmitPeephole.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCPreEmitPeephole.cpp Wed Sep 26 05:32:45 2018
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -60,6 +61,7 @@ namespace {
         return false;
       bool Changed = false;
       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
+      const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
       SmallVector<MachineInstr *, 4> InstrsToErase;
       for (MachineBasicBlock &MBB : MF) {
         for (MachineInstr &MI : MBB) {
@@ -74,6 +76,75 @@ namespace {
             }
           }
         }
+
+        // Eliminate conditional branch based on a constant CR bit by
+        // CRSET or CRUNSET. We eliminate the conditional branch or
+        // convert it into an unconditional branch. Also, if the CR bit
+        // is not used by other instructions, we eliminate CRSET as well.
+        auto I = MBB.getFirstInstrTerminator();
+        if (I == MBB.instr_end())
+          continue;
+        MachineInstr *Br = &*I;
+        if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
+          continue;
+        MachineInstr *CRSetMI = nullptr;
+        unsigned CRBit = Br->getOperand(0).getReg();
+        unsigned CRReg = getCRFromCRBit(CRBit);
+        bool SeenUse = false;
+        MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
+        for (It++; It != Er; It++) {
+          if (It->modifiesRegister(CRBit, TRI)) {
+            if ((It->getOpcode() == PPC::CRUNSET ||
+                 It->getOpcode() == PPC::CRSET) &&
+                It->getOperand(0).getReg() == CRBit)
+              CRSetMI = &*It;
+            break;
+          }
+          if (It->readsRegister(CRBit, TRI))
+            SeenUse = true;
+        }
+        if (!CRSetMI) continue;
+
+        unsigned CRSetOp = CRSetMI->getOpcode();
+        if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
+            (Br->getOpcode() == PPC::BC  && CRSetOp == PPC::CRUNSET)) {
+          // Remove this branch since it cannot be taken.
+          InstrsToErase.push_back(Br);
+          MBB.removeSuccessor(Br->getOperand(1).getMBB());
+        }
+        else {
+          // This conditional branch is always taken. So, remove all branches
+          // and insert an unconditional branch to the destination of this.
+          MachineBasicBlock::iterator It = Br, Er = MBB.end();
+          for (; It != Er && !SeenUse; It++) {
+            if (It->isDebugInstr()) continue;
+            assert(It->isTerminator() && "Non-terminator after a terminator");
+            InstrsToErase.push_back(&*It);
+          }
+          if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
+            ArrayRef<MachineOperand> NoCond;
+            TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
+                              NoCond, Br->getDebugLoc());
+          }
+          for (auto &Succ : MBB.successors())
+            if (Succ != Br->getOperand(1).getMBB()) {
+              MBB.removeSuccessor(Succ);
+              break;
+            }
+        }
+
+        // If the CRBit is not used by another instruction, we can eliminate
+        // CRSET/CRUNSET instruction.
+        if (!SeenUse) {
+          // We need to check use of the CRBit in successors.
+          for (auto &SuccMBB : MBB.successors())
+            if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
+              SeenUse = true;
+              break;
+            }
+          if (!SeenUse)
+            InstrsToErase.push_back(CRSetMI);
+        }
       }
       for (MachineInstr *MI : InstrsToErase) {
         LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");

Added: llvm/trunk/test/CodeGen/PowerPC/setcr_bc.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/setcr_bc.mir?rev=343100&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/setcr_bc.mir (added)
+++ llvm/trunk/test/CodeGen/PowerPC/setcr_bc.mir Wed Sep 26 05:32:45 2018
@@ -0,0 +1,132 @@
+# RUN: llc -verify-machineinstrs -start-before=ppc-pre-emit-peephole %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:e-i64:64-n32:64"
+  target triple = "powerpc64le-unknown-linux-gnu"
+  
+  declare signext i32 @callee(i32 signext) local_unnamed_addr #1
+
+  define signext i32 @func(i32 signext %v) local_unnamed_addr #0 {
+  entry:
+    %call.i = tail call signext i32 @callee(i32 signext %v)
+    %tobool.i = icmp eq i32 %call.i, 0
+    br i1 %tobool.i, label %if.else.i, label %if.then.i
+  
+  if.then.i:                                        ; preds = %entry
+    %call2.i = tail call signext i32 @callee(i32 signext %call.i)
+    br label %_Z6calleei.exit
+  
+  if.else.i:                                        ; preds = %entry
+    %phitmp = icmp sgt i32 %v, -1
+    br label %_Z6calleei.exit
+  
+  _Z6calleei.exit:                                  ; preds = %if.else.i, %if.then.i
+    %call2.i.sink = phi i32 [ %call2.i, %if.then.i ], [ %v, %if.else.i ]
+    %.sink = phi i1 [ false, %if.then.i ], [ %phitmp, %if.else.i ]
+    br i1 %.sink, label %if.end, label %if.then
+  
+  if.then:                                          ; preds = %_Z6calleei.exit
+    %call1 = tail call signext i32 @callee(i32 signext 0)
+    br label %if.end
+  
+  if.end:                                           ; preds = %if.then, %_Z6calleei.exit
+    ret i32 %call2.i.sink
+  }
+  
+  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+...
+---
+name:            func
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+registers:       
+liveins:         
+  - { reg: '$x3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       48
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 32
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: 0, 
+      callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', 
+      debug-info-expression: '', debug-info-location: '' }
+stack:           
+constants:       
+
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+    liveins: $x3, $x30
+
+    ; bc and crxor (CRUNSET) should be removed.
+    ; CHECK-LABEL: func
+    ; CHECK: # %bb.1
+    ; CHECK-NOT: crxor
+    ; CHECK-NOT: bc
+    ; CHECK: .LBB0_2
+
+    $x0 = MFLR8 implicit $lr8
+    STD killed $x0, 16, $x1
+    $x1 = STDU $x1, -48, $x1
+    STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16)
+    $x30 = OR8 $x3, $x3
+    BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
+    renamable $cr0 = CMPLWI renamable $r3, 0
+    BCC 76, killed renamable $cr0, %bb.2
+  
+  bb.1.if.then.i:
+    successors: %bb.5(0x40000000), %bb.4(0x40000000)
+    liveins: $x3
+  
+    renamable $x3 = EXTSW_32_64 killed renamable $r3, implicit $x3
+    BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
+    renamable $cr0gt = CRUNSET implicit-def $cr0
+    $x30 = OR8 killed $x3, $x3
+    BC killed renamable $cr0gt, %bb.5
+  
+  bb.4.if.then:
+    successors: %bb.5(0x80000000)
+    liveins: $x30
+  
+    $x3 = LI8 0
+    BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
+  
+  bb.5.if.end:
+    liveins: $x30
+  
+    renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30
+    $x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16)
+    $x1 = ADDI8 $x1, 48
+    $x0 = LD 16, $x1
+    MTLR8 killed $x0, implicit-def $lr8
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+  
+  bb.2.if.else.i:
+    successors: %bb.5(0x40000000), %bb.4(0x40000000)
+    liveins: $x30
+  
+    renamable $cr0 = CMPWI renamable $r30, -1
+    BCn killed renamable $cr0gt, %bb.4
+    B %bb.5
+
+...

Added: llvm/trunk/test/CodeGen/PowerPC/setcr_bc2.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/setcr_bc2.mir?rev=343100&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/setcr_bc2.mir (added)
+++ llvm/trunk/test/CodeGen/PowerPC/setcr_bc2.mir Wed Sep 26 05:32:45 2018
@@ -0,0 +1,132 @@
+# RUN: llc -verify-machineinstrs -start-before=ppc-pre-emit-peephole %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:e-i64:64-n32:64"
+  target triple = "powerpc64le-unknown-linux-gnu"
+  
+  declare signext i32 @callee(i32 signext) local_unnamed_addr #1
+
+  define signext i32 @func(i32 signext %v) local_unnamed_addr #0 {
+  entry:
+    %call.i = tail call signext i32 @callee(i32 signext %v)
+    %tobool.i = icmp eq i32 %call.i, 0
+    br i1 %tobool.i, label %if.else.i, label %if.then.i
+  
+  if.then.i:                                        ; preds = %entry
+    %call2.i = tail call signext i32 @callee(i32 signext %call.i)
+    br label %_Z6calleei.exit
+  
+  if.else.i:                                        ; preds = %entry
+    %phitmp = icmp sgt i32 %v, -1
+    br label %_Z6calleei.exit
+  
+  _Z6calleei.exit:                                  ; preds = %if.else.i, %if.then.i
+    %call2.i.sink = phi i32 [ %call2.i, %if.then.i ], [ %v, %if.else.i ]
+    %.sink = phi i1 [ false, %if.then.i ], [ %phitmp, %if.else.i ]
+    br i1 %.sink, label %if.end, label %if.then
+  
+  if.then:                                          ; preds = %_Z6calleei.exit
+    %call1 = tail call signext i32 @callee(i32 signext 0)
+    br label %if.end
+  
+  if.end:                                           ; preds = %if.then, %_Z6calleei.exit
+    ret i32 %call2.i.sink
+  }
+  
+  attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+...
+---
+name:            func
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+registers:       
+liveins:         
+  - { reg: '$x3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       48
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 32
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: 0, 
+      callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', 
+      debug-info-expression: '', debug-info-location: '' }
+stack:           
+constants:       
+
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+    liveins: $x3, $x30
+
+    ; bc should be converted into b, but creqv (CRSET) should not be removed since it is used in a predecessor.
+    ; CHECK-LABEL: func
+    ; CHECK: # %bb.1
+    ; CHECK: creqv
+    ; CHECK-NOT: bc
+    ; CHECK: .LBB0_2
+
+    $x0 = MFLR8 implicit $lr8
+    STD killed $x0, 16, $x1
+    $x1 = STDU $x1, -48, $x1
+    STD killed $x30, 32, $x1 :: (store 8 into %fixed-stack.0, align 16)
+    $x30 = OR8 $x3, $x3
+    BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
+    renamable $cr0 = CMPLWI renamable $r3, 0
+    BCC 76, killed renamable $cr0, %bb.2
+  
+  bb.1.if.then.i:
+    successors: %bb.5(0x40000000), %bb.4(0x40000000)
+    liveins: $x3
+  
+    renamable $x3 = EXTSW_32_64 killed renamable $r3, implicit $x3
+    BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
+    renamable $cr0gt = CRSET implicit-def $cr0
+    $x30 = OR8 killed $x3, $x3
+    BC killed renamable $cr0gt, %bb.5
+  
+  bb.4.if.then:
+    successors: %bb.5(0x80000000)
+    liveins: $x30
+  
+    $x3 = LI8 0
+    BL8_NOP @callee, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
+  
+  bb.5.if.end:
+    liveins: $x30, $cr0gt
+  
+    renamable $x3 = EXTSW_32_64 killed renamable $r30, implicit $x30
+    $x30 = LD 32, $x1 :: (load 8 from %fixed-stack.0, align 16)
+    $x1 = ADDI8 $x1, 48
+    $x0 = LD 16, $x1
+    MTLR8 killed $x0, implicit-def $lr8
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+  
+  bb.2.if.else.i:
+    successors: %bb.5(0x40000000), %bb.4(0x40000000)
+    liveins: $x30
+  
+    renamable $cr0 = CMPWI renamable $r30, -1
+    BCn killed renamable $cr0gt, %bb.4
+    B %bb.5
+
+...




More information about the llvm-commits mailing list