[llvm] r284545 - Using branch probability to guide critical edge splitting.
Dehao Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 18 16:24:03 PDT 2016
Author: dehao
Date: Tue Oct 18 18:24:02 2016
New Revision: 284545
URL: http://llvm.org/viewvc/llvm-project?rev=284545&view=rev
Log:
Using branch probability to guide critical edge splitting.
Summary:
The original heuristic to break critical edge during machine sink is relatively conservertive: when there is only one instruction sinkable to the critical edge, it is likely that the machine sink pass will not break the critical edge. This leads to many speculative instructions executed at runtime. However, with profile info, we could model the splitting benefits: if the critical edge has 50% taken rate, it would always be beneficial to split the critical edge to avoid the speculated runtime instructions. This patch uses profile to guide critical edge splitting in machine sink pass.
The performance impact on speccpu2006 on Intel sandybridge machines:
spec/2006/fp/C++/444.namd 25.3 +0.26%
spec/2006/fp/C++/447.dealII 45.96 -0.10%
spec/2006/fp/C++/450.soplex 41.97 +1.49%
spec/2006/fp/C++/453.povray 36.83 -0.96%
spec/2006/fp/C/433.milc 23.81 +0.32%
spec/2006/fp/C/470.lbm 41.17 +0.34%
spec/2006/fp/C/482.sphinx3 48.13 +0.69%
spec/2006/int/C++/471.omnetpp 22.45 +3.25%
spec/2006/int/C++/473.astar 21.35 -2.06%
spec/2006/int/C++/483.xalancbmk 36.02 -2.39%
spec/2006/int/C/400.perlbench 33.7 -0.17%
spec/2006/int/C/401.bzip2 22.9 +0.52%
spec/2006/int/C/403.gcc 32.42 -0.54%
spec/2006/int/C/429.mcf 39.59 +0.19%
spec/2006/int/C/445.gobmk 26.98 -0.00%
spec/2006/int/C/456.hmmer 24.52 -0.18%
spec/2006/int/C/458.sjeng 28.26 +0.02%
spec/2006/int/C/462.libquantum 55.44 +3.74%
spec/2006/int/C/464.h264ref 46.67 -0.39%
geometric mean +0.20%
Manually checked 473 and 471 to verify the diff is in the noise range.
Reviewers: rengolin, davidxl
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D24818
Modified:
llvm/trunk/lib/CodeGen/MachineSink.cpp
llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll
llvm/trunk/test/CodeGen/ARM/code-placement.ll
llvm/trunk/test/CodeGen/X86/block-placement.ll
llvm/trunk/test/CodeGen/X86/clz.ll
llvm/trunk/test/CodeGen/X86/loop-search.ll
llvm/trunk/test/CodeGen/X86/machine-sink.ll
llvm/trunk/test/CodeGen/X86/phys_subreg_coalesce-2.ll
llvm/trunk/test/CodeGen/X86/pr2659.ll
llvm/trunk/test/DebugInfo/COFF/pieces.ll
Modified: llvm/trunk/lib/CodeGen/MachineSink.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSink.cpp?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineSink.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineSink.cpp Tue Oct 18 18:24:02 2016
@@ -24,6 +24,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -60,6 +61,15 @@ UseBlockFreqInfo("machine-sink-bfi",
cl::desc("Use block frequency info to find successors to sink"),
cl::init(true), cl::Hidden);
+static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
+ "machine-sink-split-probability-threshold",
+ cl::desc(
+ "Percentage threshold for splitting single-instruction critical edge. "
+ "If the branch threshold is higher than this threshold, we allow "
+ "speculative execution of up to 1 instruction to avoid branching to "
+ "splitted critical edge"),
+ cl::init(40), cl::Hidden);
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -74,6 +84,7 @@ namespace {
MachinePostDominatorTree *PDT; // Machine post dominator tree
MachineLoopInfo *LI;
const MachineBlockFrequencyInfo *MBFI;
+ const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
// Remember which edges have been considered for breaking.
@@ -105,6 +116,7 @@ namespace {
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
@@ -163,6 +175,7 @@ char MachineSinking::ID = 0;
char &llvm::MachineSinkingID = MachineSinking::ID;
INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
"Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -283,6 +296,7 @@ bool MachineSinking::runOnMachineFunctio
PDT = &getAnalysis<MachinePostDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool EverMadeChange = false;
@@ -383,6 +397,10 @@ bool MachineSinking::isWorthBreakingCrit
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;
+ if (MBPI->getEdgeProbability(From, To) <=
+ BranchProbability(SplitEdgeProbabilityThreshold, 100))
+ return true;
+
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
Modified: llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/atomic-cmpxchg.ll Tue Oct 18 18:24:02 2016
@@ -38,16 +38,14 @@ entry:
; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]:
; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV6-NEXT: mov [[RES:r[0-9]+]], #0
; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]]
-; CHECK-ARMV6-NEXT: bne [[END:.LBB[0-9_]+]]
+; CHECK-ARMV6-NEXT: movne [[RES:r[0-9]+]], #0
+; CHECK-ARMV6-NEXT: bxne lr
; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK-ARMV6-NEXT: mov [[RES]], #1
; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV6-NEXT: bne [[TRY]]
-; CHECK-ARMV6-NEXT: [[END]]:
-; CHECK-ARMV6-NEXT: mov r0, [[RES]]
-; CHECK-ARMV6-NEXT: bx lr
+; CHECK-ARMV6-NEXT: moveq [[RES]], #1
+; CHECK-ARMV6-NEXT: bxeq lr
+; CHECK-ARMV6-NEXT: b [[TRY]]
; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
@@ -64,20 +62,18 @@ entry:
; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
; CHECK-ARMV7-NEXT: .fnstart
; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
-; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
-; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
-; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
-; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]:
; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
-; CHECK-ARMV7-NEXT: bne [[TRY]]
-; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]
-; CHECK-ARMV7-NEXT: [[FAIL]]:
+; CHECK-ARMV7-NEXT: moveq [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: bxeq lr
+; CHECK-ARMV7-NEXT: [[TRY]]:
+; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: beq [[HEAD]]
; CHECK-ARMV7-NEXT: clrex
; CHECK-ARMV7-NEXT: mov [[RES]], #0
-; CHECK-ARMV7-NEXT: [[END]]:
-; CHECK-ARMV7-NEXT: mov r0, [[RES]]
; CHECK-ARMV7-NEXT: bx lr
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
Modified: llvm/trunk/test/CodeGen/ARM/code-placement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/code-placement.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/code-placement.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/code-placement.ll Tue Oct 18 18:24:02 2016
@@ -12,9 +12,9 @@ entry:
br i1 %0, label %bb2, label %bb
bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_[[LABEL:[0-9]]]:
+; CHECK: bne LBB0_[[LABEL]]
+; CHECK-NOT: b LBB0_[[LABEL]]
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
@@ -34,14 +34,13 @@ bb2:
define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
entry:
; CHECK-LABEL: t2:
-; CHECK: beq LBB1_[[RET:.]]
%0 = icmp eq i32 %passes, 0 ; <i1> [#uses=1]
br i1 %0, label %bb5, label %bb.nph15
-; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
bb1: ; preds = %bb2.preheader, %bb1
-; CHECK: LBB1_[[BB1:.]]: @ %bb1
-; CHECK: bne LBB1_[[BB1]]
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+; CHECK: blt LBB1_[[BB3]]
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
@@ -53,9 +52,9 @@ bb1:
br i1 %exitcond, label %bb3, label %bb1
bb3: ; preds = %bb1, %bb2.preheader
-; CHECK: LBB1_[[BB3:.]]: @ %bb3
-; CHECK: bne LBB1_[[PREHDR]]
-; CHECK-NOT: b LBB1_
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
+; CHECK: b LBB1_[[BB3]]
%sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
%3 = add i32 %pass.011, 1 ; <i32> [#uses=2]
%exitcond18 = icmp eq i32 %3, %passes ; <i1> [#uses=1]
@@ -71,8 +70,6 @@ bb2.preheader:
%sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
br i1 %4, label %bb1, label %bb3
-; CHECK: LBB1_[[RET]]: @ %bb5
-; CHECK: pop
bb5: ; preds = %bb3, %entry
%sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
ret i32 %sum.1.lcssa
Modified: llvm/trunk/test/CodeGen/X86/block-placement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/block-placement.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/block-placement.ll (original)
+++ llvm/trunk/test/CodeGen/X86/block-placement.ll Tue Oct 18 18:24:02 2016
@@ -478,12 +478,12 @@ define void @fpcmp_unanalyzable_branch(i
; CHECK-LABEL: fpcmp_unanalyzable_branch:
; CHECK: # BB#0: # %entry
; CHECK: # BB#1: # %entry.if.then_crit_edge
-; CHECK: .LBB10_4: # %if.then
-; CHECK: .LBB10_5: # %if.end
+; CHECK: .LBB10_5: # %if.then
+; CHECK: .LBB10_6: # %if.end
; CHECK: # BB#3: # %exit
; CHECK: jne .LBB10_4
-; CHECK-NEXT: jnp .LBB10_5
-; CHECK-NEXT: jmp .LBB10_4
+; CHECK-NEXT: jnp .LBB10_6
+; CHECK: jmp .LBB10_5
entry:
; Note that this branch must be strongly biased toward
Modified: llvm/trunk/test/CodeGen/X86/clz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clz.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clz.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clz.ll Tue Oct 18 18:24:02 2016
@@ -279,28 +279,32 @@ define i64 @ctlz_i64(i64 %x) {
define i8 @ctlz_i8_zero_test(i8 %n) {
; X32-LABEL: ctlz_i8_zero_test:
; X32: # BB#0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT: movb $8, %al
-; X32-NEXT: testb %cl, %cl
-; X32-NEXT: je .LBB8_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: testb %al, %al
+; X32-NEXT: je .LBB8_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $7, %eax
-; X32-NEXT: .LBB8_2: # %cond.end
+; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT: retl
+; X32-NEXT: .LBB8_1:
+; X32-NEXT: movb $8, %al
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i8_zero_test:
; X64: # BB#0:
-; X64-NEXT: movb $8, %al
; X64-NEXT: testb %dil, %dil
-; X64-NEXT: je .LBB8_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB8_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: bsrl %eax, %eax
; X64-NEXT: xorl $7, %eax
-; X64-NEXT: .LBB8_2: # %cond.end
+; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT: retq
+; X64-NEXT: .LBB8_1:
+; X64-NEXT: movb $8, %al
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
;
@@ -327,26 +331,30 @@ define i8 @ctlz_i8_zero_test(i8 %n) {
define i16 @ctlz_i16_zero_test(i16 %n) {
; X32-LABEL: ctlz_i16_zero_test:
; X32: # BB#0:
-; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movw $16, %ax
-; X32-NEXT: testw %cx, %cx
-; X32-NEXT: je .LBB9_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrw %cx, %ax
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testw %ax, %ax
+; X32-NEXT: je .LBB9_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrw %ax, %ax
; X32-NEXT: xorl $15, %eax
-; X32-NEXT: .LBB9_2: # %cond.end
+; X32-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X32-NEXT: retl
+; X32-NEXT: .LBB9_1:
+; X32-NEXT: movw $16, %ax
; X32-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i16_zero_test:
; X64: # BB#0:
-; X64-NEXT: movw $16, %ax
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB9_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB9_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrw %di, %ax
; X64-NEXT: xorl $15, %eax
-; X64-NEXT: .LBB9_2: # %cond.end
+; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: retq
+; X64-NEXT: .LBB9_1:
+; X64-NEXT: movw $16, %ax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
;
@@ -367,25 +375,27 @@ define i16 @ctlz_i16_zero_test(i16 %n) {
define i32 @ctlz_i32_zero_test(i32 %n) {
; X32-LABEL: ctlz_i32_zero_test:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl $32, %eax
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: je .LBB10_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: je .LBB10_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $31, %eax
-; X32-NEXT: .LBB10_2: # %cond.end
+; X32-NEXT: retl
+; X32-NEXT: .LBB10_1:
+; X32-NEXT: movl $32, %eax
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i32_zero_test:
; X64: # BB#0:
-; X64-NEXT: movl $32, %eax
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB10_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB10_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrl %edi, %eax
; X64-NEXT: xorl $31, %eax
-; X64-NEXT: .LBB10_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB10_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: ctlz_i32_zero_test:
@@ -464,26 +474,30 @@ define i64 @ctlz_i64_zero_test(i64 %n) {
define i8 @cttz_i8_zero_test(i8 %n) {
; X32-LABEL: cttz_i8_zero_test:
; X32: # BB#0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT: movb $8, %al
-; X32-NEXT: testb %cl, %cl
-; X32-NEXT: je .LBB12_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: testb %al, %al
+; X32-NEXT: je .LBB12_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: bsfl %eax, %eax
-; X32-NEXT: .LBB12_2: # %cond.end
+; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X32-NEXT: retl
+; X32-NEXT: .LBB12_1
+; X32-NEXT: movb $8, %al
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: cttz_i8_zero_test:
; X64: # BB#0:
-; X64-NEXT: movb $8, %al
; X64-NEXT: testb %dil, %dil
-; X64-NEXT: je .LBB12_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB12_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: bsfl %eax, %eax
-; X64-NEXT: .LBB12_2: # %cond.end
+; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; X64-NEXT: retq
+; X64-NEXT: .LBB12_1:
+; X64-NEXT: movb $8, %al
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
;
@@ -510,23 +524,25 @@ define i8 @cttz_i8_zero_test(i8 %n) {
define i16 @cttz_i16_zero_test(i16 %n) {
; X32-LABEL: cttz_i16_zero_test:
; X32: # BB#0:
-; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testw %ax, %ax
+; X32-NEXT: je .LBB13_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsfw %ax, %ax
+; X32-NEXT: retl
+; X32-NEXT: .LBB13_1
; X32-NEXT: movw $16, %ax
-; X32-NEXT: testw %cx, %cx
-; X32-NEXT: je .LBB13_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsfw %cx, %ax
-; X32-NEXT: .LBB13_2: # %cond.end
; X32-NEXT: retl
;
; X64-LABEL: cttz_i16_zero_test:
; X64: # BB#0:
-; X64-NEXT: movw $16, %ax
; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB13_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB13_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsfw %di, %ax
-; X64-NEXT: .LBB13_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB13_1:
+; X64-NEXT: movw $16, %ax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: cttz_i16_zero_test:
@@ -546,23 +562,25 @@ define i16 @cttz_i16_zero_test(i16 %n) {
define i32 @cttz_i32_zero_test(i32 %n) {
; X32-LABEL: cttz_i32_zero_test:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: je .LBB14_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsfl %eax, %eax
+; X32-NEXT: retl
+; X32-NEXT: .LBB14_1
; X32-NEXT: movl $32, %eax
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: je .LBB14_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsfl %ecx, %eax
-; X32-NEXT: .LBB14_2: # %cond.end
; X32-NEXT: retl
;
; X64-LABEL: cttz_i32_zero_test:
; X64: # BB#0:
-; X64-NEXT: movl $32, %eax
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB14_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB14_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsfl %edi, %eax
-; X64-NEXT: .LBB14_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB14_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: cttz_i32_zero_test:
@@ -642,25 +660,27 @@ define i64 @cttz_i64_zero_test(i64 %n) {
define i32 @ctlz_i32_fold_cmov(i32 %n) {
; X32-LABEL: ctlz_i32_fold_cmov:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: orl $1, %ecx
-; X32-NEXT: movl $32, %eax
-; X32-NEXT: je .LBB16_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl $1, %eax
+; X32-NEXT: je .LBB16_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $31, %eax
-; X32-NEXT: .LBB16_2: # %cond.end
+; X32-NEXT: retl
+; X32-NEXT: .LBB16_1
+; X32-NEXT: movl $32, %eax
; X32-NEXT: retl
;
; X64-LABEL: ctlz_i32_fold_cmov:
; X64: # BB#0:
; X64-NEXT: orl $1, %edi
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: je .LBB16_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB16_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrl %edi, %eax
; X64-NEXT: xorl $31, %eax
-; X64-NEXT: .LBB16_2: # %cond.end
+; X64-NEXT: retq
+; X64-NEXT: .LBB16_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: ctlz_i32_fold_cmov:
@@ -716,26 +736,30 @@ define i32 @ctlz_bsr(i32 %n) {
define i32 @ctlz_bsr_zero_test(i32 %n) {
; X32-LABEL: ctlz_bsr_zero_test:
; X32: # BB#0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl $32, %eax
-; X32-NEXT: testl %ecx, %ecx
-; X32-NEXT: je .LBB18_2
-; X32-NEXT: # BB#1: # %cond.false
-; X32-NEXT: bsrl %ecx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: je .LBB18_1
+; X32-NEXT: # BB#2: # %cond.false
+; X32-NEXT: bsrl %eax, %eax
; X32-NEXT: xorl $31, %eax
-; X32-NEXT: .LBB18_2: # %cond.end
+; X32-NEXT: xorl $31, %eax
+; X32-NEXT: retl
+; X32-NEXT: .LBB18_1:
+; X32-NEXT: movl $32, %eax
; X32-NEXT: xorl $31, %eax
; X32-NEXT: retl
;
; X64-LABEL: ctlz_bsr_zero_test:
; X64: # BB#0:
-; X64-NEXT: movl $32, %eax
; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB18_2
-; X64-NEXT: # BB#1: # %cond.false
+; X64-NEXT: je .LBB18_1
+; X64-NEXT: # BB#2: # %cond.false
; X64-NEXT: bsrl %edi, %eax
; X64-NEXT: xorl $31, %eax
-; X64-NEXT: .LBB18_2: # %cond.end
+; X64-NEXT: xorl $31, %eax
+; X64-NEXT: retq
+; X64-NEXT: .LBB18_1:
+; X64-NEXT: movl $32, %eax
; X64-NEXT: xorl $31, %eax
; X64-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/loop-search.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-search.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-search.ll (original)
+++ llvm/trunk/test/CodeGen/X86/loop-search.ll Tue Oct 18 18:24:02 2016
@@ -10,19 +10,17 @@ define zeroext i1 @search(i32 %needle, i
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: jle LBB0_1
; CHECK-NEXT: ## BB#4: ## %for.body.preheader
-; CHECK-NEXT: movslq %edx, %rcx
-; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: movslq %edx, %rax
+; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_5: ## %for.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; ### FIXME: This loop invariant should be hoisted
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: cmpl %edi, (%rsi,%rdx,4)
+; CHECK-NEXT: cmpl %edi, (%rsi,%rcx,4)
; CHECK-NEXT: je LBB0_6
; CHECK-NEXT: ## BB#2: ## %for.cond
; CHECK-NEXT: ## in Loop: Header=BB0_5 Depth=1
-; CHECK-NEXT: incq %rdx
-; CHECK-NEXT: cmpq %rcx, %rdx
+; CHECK-NEXT: incq %rcx
+; CHECK-NEXT: cmpq %rax, %rcx
; CHECK-NEXT: jl LBB0_5
; ### FIXME: BB#3 and LBB0_1 should be merged
; CHECK-NEXT: ## BB#3:
@@ -33,7 +31,8 @@ define zeroext i1 @search(i32 %needle, i
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB0_6: ## %cleanup
+; CHECK-NEXT: LBB0_6:
+; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/machine-sink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-sink.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-sink.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-sink.ll Tue Oct 18 18:24:02 2016
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; Checks if movl $1 is sinked to critical edge.
+; CHECK-NOT: movl $1
+; CHECK: jbe
+; CHECK: movl $1
+define i32 @test(i32 %n, i32 %k) nounwind {
+entry:
+ %cmp = icmp ugt i32 %k, %n
+ br i1 %cmp, label %ifthen, label %ifend, !prof !1
+
+ifthen:
+ %y = add i32 %k, 2
+ br label %ifend
+
+ifend:
+ %ret = phi i32 [ 1, %entry ] , [ %y, %ifthen]
+ ret i32 %ret
+}
+
+!1 = !{!"branch_weights", i32 100, i32 1}
Modified: llvm/trunk/test/CodeGen/X86/phys_subreg_coalesce-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/phys_subreg_coalesce-2.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/phys_subreg_coalesce-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/phys_subreg_coalesce-2.ll Tue Oct 18 18:24:02 2016
@@ -14,7 +14,9 @@ forcond.preheader: ; preds = %entry
ifthen: ; preds = %entry
ret i32 0
; CHECK: forbody{{$}}
+; There should be no mov instruction in the for body.
; CHECK-NOT: mov
+; CHECK: jbe
forbody: ; preds = %forbody, %forcond.preheader
%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ] ; <i32> [#uses=3]
%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ] ; <i32> [#uses=1]
Modified: llvm/trunk/test/CodeGen/X86/pr2659.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr2659.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr2659.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr2659.ll Tue Oct 18 18:24:02 2016
@@ -14,7 +14,7 @@ forcond.preheader: ; preds
br i1 %cmp44, label %afterfor, label %forbody
; CHECK: %forcond.preheader
-; CHECK: movl $1
+; CHECK: testl
; CHECK-NOT: xorl
; CHECK-NOT: movl
; CHECK-NOT: LBB
@@ -24,6 +24,7 @@ forcond.preheader: ; preds
; CHECK: %forbody{{$}}
; CHECK-NOT: mov
; CHECK: jbe
+; CHECK: movl $1
ifthen: ; preds = %entry
ret i32 0
Modified: llvm/trunk/test/DebugInfo/COFF/pieces.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/COFF/pieces.ll?rev=284545&r1=284544&r2=284545&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/COFF/pieces.ll (original)
+++ llvm/trunk/test/DebugInfo/COFF/pieces.ll Tue Oct 18 18:24:02 2016
@@ -37,11 +37,11 @@
; ASM-LABEL: loop_csr: # @loop_csr
; ASM: #DEBUG_VALUE: loop_csr:o [bit_piece offset=0 size=32] <- 0
; ASM: #DEBUG_VALUE: loop_csr:o [bit_piece offset=32 size=32] <- 0
-; ASM: # BB#1: # %for.body.preheader
+; ASM: # BB#2: # %for.body.preheader
; ASM: xorl %edi, %edi
; ASM: xorl %esi, %esi
; ASM: .p2align 4, 0x90
-; ASM: .LBB0_2: # %for.body
+; ASM: .LBB0_3: # %for.body
; ASM: [[ox_start:\.Ltmp[0-9]+]]:
; ASM: #DEBUG_VALUE: loop_csr:o [bit_piece offset=0 size=32] <- %EDI
; ASM: .cv_loc 0 1 13 11 # t.c:13:11
@@ -57,7 +57,7 @@
; ASM: movl %eax, %esi
; ASM: #DEBUG_VALUE: loop_csr:o [bit_piece offset=32 size=32] <- %ESI
; ASM: cmpl n(%rip), %eax
-; ASM: jl .LBB0_2
+; ASM: jl .LBB0_3
; ASM: [[oy_end:\.Ltmp[0-9]+]]:
; ASM: addl %edi, %esi
; ASM: movl %esi, %eax
More information about the llvm-commits
mailing list