[llvm] 5b4759f - Revert "[X86] Don't always separate conditions in `(br (and/or cond0, cond1))` into separate branches"
NAKAMURA Takumi via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 3 05:41:18 PST 2024
Author: NAKAMURA Takumi
Date: 2024-03-03T22:31:28+09:00
New Revision: 5b4759f9fd1419abc69e656c40f04a0fd9483d2a
URL: https://github.com/llvm/llvm-project/commit/5b4759f9fd1419abc69e656c40f04a0fd9483d2a
DIFF: https://github.com/llvm/llvm-project/commit/5b4759f9fd1419abc69e656c40f04a0fd9483d2a.diff
LOG: Revert "[X86] Don't always separate conditions in `(br (and/or cond0, cond1))` into separate branches"
This has been buggy for a while.
Reverts #81689
This reverts commit ae76dfb74701e05e5ab4be194e20e49f10768e46.
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
llvm/test/CodeGen/X86/avx-cmp.ll
llvm/test/CodeGen/X86/cmp.ll
llvm/test/CodeGen/X86/dagcombine-and-setcc.ll
llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll
llvm/test/CodeGen/X86/lsr-addrecloops.ll
llvm/test/CodeGen/X86/movmsk-cmp.ll
llvm/test/CodeGen/X86/or-branch.ll
llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
llvm/test/CodeGen/X86/pr33747.ll
llvm/test/CodeGen/X86/pr37025.ll
llvm/test/CodeGen/X86/pr38795.ll
llvm/test/CodeGen/X86/setcc-logic.ll
llvm/test/CodeGen/X86/swifterror.ll
llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
llvm/test/CodeGen/X86/tail-opts.ll
llvm/test/CodeGen/X86/tailcall-extract.ll
llvm/test/CodeGen/X86/test-shrink-bug.ll
llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4c2815679efc92..f2e00aab8d5da2 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -596,42 +596,6 @@ class TargetLoweringBase {
/// avoided.
bool isJumpExpensive() const { return JumpIsExpensive; }
- // Costs parameters used by
- // SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
- // shouldKeepJumpConditionsTogether will use these parameter value to
- // determine if two conditions in the form `br (and/or cond1, cond2)` should
- // be split into two branches or left as one.
- //
- // BaseCost is the cost threshold (in latency). If the estimated latency of
- // computing both `cond1` and `cond2` is below the cost of just computing
- // `cond1` + BaseCost, the two conditions will be kept together. Otherwise
- // they will be split.
- //
- // LikelyBias increases BaseCost if branch probability info indicates that it
- // is likely that both `cond1` and `cond2` will be computed.
- //
- // UnlikelyBias decreases BaseCost if branch probability info indicates that
- // it is likely that both `cond1` and `cond2` will be computed.
- //
- // Set any field to -1 to make it ignored (setting BaseCost to -1 results in
- // `shouldKeepJumpConditionsTogether` always returning false).
- struct CondMergingParams {
- int BaseCost;
- int LikelyBias;
- int UnlikelyBias;
- };
- // Return params for deciding if we should keep two branch conditions merged
- // or split them into two separate branches.
- // Arg0: The binary op joining the two conditions (and/or).
- // Arg1: The first condition (cond1)
- // Arg2: The second condition (cond2)
- virtual CondMergingParams
- getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
- const Value *) const {
- // -1 will always result in splitting.
- return {-1, -1, -1};
- }
-
/// Return true if selects are only cheaper than branches if the branch is
/// unlikely to be predicted right.
bool isPredictableSelectExpensive() const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4f6263cc492fe3..ab2f42d2024ccc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -26,7 +26,6 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
@@ -94,7 +93,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -2448,147 +2446,6 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
SL->SwitchCases.push_back(CB);
}
-// Collect dependencies on V recursively. This is used for the cost analysis in
-// `shouldKeepJumpConditionsTogether`.
-static bool
-collectInstructionDeps(SmallPtrSet<const Instruction *, 8> *Deps,
- const Value *V,
- SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
- unsigned Depth = 0) {
- // Return false if we have an incomplete count.
- if (Depth >= SelectionDAG::MaxRecursionDepth)
- return false;
-
- auto *I = dyn_cast<Instruction>(V);
- if (I == nullptr)
- return true;
-
- if (Necessary != nullptr) {
- // This instruction is necessary for the other side of the condition so
- // don't count it.
- if (Necessary->contains(I))
- return true;
- }
-
- // Already added this dep.
- if (!Deps->insert(I).second)
- return true;
-
- for (unsigned OpIdx = 0, E = I->getNumOperands(); OpIdx < E; ++OpIdx)
- if (!collectInstructionDeps(Deps, I->getOperand(OpIdx), Necessary,
- Depth + 1))
- return false;
- return true;
-}
-
-bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
- const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
- Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
- TargetLoweringBase::CondMergingParams Params) const {
- if (I.getNumSuccessors() != 2)
- return false;
-
- if (Params.BaseCost < 0)
- return false;
-
- // Baseline cost.
- InstructionCost CostThresh = Params.BaseCost;
-
- BranchProbabilityInfo *BPI = nullptr;
- if (Params.LikelyBias || Params.UnlikelyBias)
- BPI = FuncInfo.BPI;
- if (BPI != nullptr) {
- // See if we are either likely to get an early out or compute both lhs/rhs
- // of the condition.
- BasicBlock *IfFalse = I.getSuccessor(0);
- BasicBlock *IfTrue = I.getSuccessor(1);
-
- std::optional<bool> Likely;
- if (BPI->isEdgeHot(I.getParent(), IfTrue))
- Likely = true;
- else if (BPI->isEdgeHot(I.getParent(), IfFalse))
- Likely = false;
-
- if (Likely) {
- if (Opc == (*Likely ? Instruction::And : Instruction::Or))
- // Its likely we will have to compute both lhs and rhs of condition
- CostThresh += Params.LikelyBias;
- else {
- if (Params.UnlikelyBias < 0)
- return false;
- // Its likely we will get an early out.
- CostThresh -= Params.UnlikelyBias;
- }
- }
- }
-
- if (CostThresh <= 0)
- return false;
-
- // Collect "all" instructions that lhs condition is dependent on.
- SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
- collectInstructionDeps(&LhsDeps, Lhs);
- // Collect "all" instructions that rhs condition is dependent on AND are
- // dependencies of lhs. This gives us an estimate on which instructions we
- // stand to save by splitting the condition.
- if (!collectInstructionDeps(&RhsDeps, Rhs, &LhsDeps))
- return false;
- // Add the compare instruction itself unless its a dependency on the LHS.
- if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
- if (!LhsDeps.contains(RhsI))
- RhsDeps.insert(RhsI);
-
- const auto &TLI = DAG.getTargetLoweringInfo();
- const auto &TTI =
- TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
-
- InstructionCost CostOfIncluding = 0;
- // See if this instruction will need to computed independently of whether RHS
- // is.
- auto ShouldCountInsn = [&RhsDeps](const Instruction *Ins) {
- for (const auto *U : Ins->users()) {
- // If user is independent of RHS calculation we don't need to count it.
- if (auto *UIns = dyn_cast<Instruction>(U))
- if (!RhsDeps.contains(UIns))
- return false;
- }
- return true;
- };
-
- // Prune instructions from RHS Deps that are dependencies of unrelated
- // instructions. The value (SelectionDAG::MaxRecursionDepth) is fairly
- // arbitrary and just meant to cap the how much time we spend in the pruning
- // loop. Its highly unlikely to come into affect.
- const unsigned MaxPruneIters = SelectionDAG::MaxRecursionDepth;
- // Stop after a certain point. No incorrectness from including too many
- // instructions.
- for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
- const Instruction *ToDrop = nullptr;
- for (const auto *Ins : RhsDeps) {
- if (!ShouldCountInsn(Ins)) {
- ToDrop = Ins;
- break;
- }
- }
- if (ToDrop == nullptr)
- break;
- RhsDeps.erase(ToDrop);
- }
-
- for (const auto *Ins : RhsDeps) {
- // Finally accumulate latency that we can only attribute to computing the
- // RHS condition. Use latency because we are essentially trying to calculate
- // the cost of the dependency chain.
- // Possible TODO: We could try to estimate ILP and make this more precise.
- CostOfIncluding +=
- TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
-
- if (CostOfIncluding > CostThresh)
- return false;
- }
- return true;
-}
-
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -2803,13 +2660,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;
- if (Opcode &&
- !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
- match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
- !shouldKeepJumpConditionsTogether(
- FuncInfo, I, Opcode, BOp0, BOp1,
- DAG.getTargetLoweringInfo().getJumpConditionMergingParams(
- Opcode, BOp0, BOp1))) {
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 2084de473b8062..47657313cb6a3b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -385,11 +385,6 @@ class SelectionDAGBuilder {
N = NewN;
}
- bool shouldKeepJumpConditionsTogether(
- const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
- Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
- TargetLoweringBase::CondMergingParams Params) const;
-
void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6de0ae8a206482..866a2a94a0bfe9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -77,37 +77,6 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"alignment set by x86-experimental-pref-loop-alignment."),
cl::Hidden);
-static cl::opt<int> BrMergingBaseCostThresh(
- "x86-br-merging-base-cost", cl::init(1),
- cl::desc(
- "Sets the cost threshold for when multiple conditionals will be merged "
- "into one branch versus be split in multiple branches. Merging "
- "conditionals saves branches at the cost of additional instructions. "
- "This value sets the instruction cost limit, below which conditionals "
- "will be merged, and above which conditionals will be split."),
- cl::Hidden);
-
-static cl::opt<int> BrMergingLikelyBias(
- "x86-br-merging-likely-bias", cl::init(0),
- cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
- "that all conditionals will be executed. For example for merging "
- "the conditionals (a == b && c > d), if its known that a == b is "
- "likely, then it is likely that if the conditionals are split "
- "both sides will be executed, so it may be desirable to increase "
- "the instruction cost threshold."),
- cl::Hidden);
-
-static cl::opt<int> BrMergingUnlikelyBias(
- "x86-br-merging-unlikely-bias", cl::init(1),
- cl::desc(
- "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
- "that all conditionals will be executed. For example for merging "
- "the conditionals (a == b && c > d), if its known that a == b is "
- "unlikely, then it is unlikely that if the conditionals are split "
- "both sides will be executed, so it may be desirable to decrease "
- "the instruction cost threshold."),
- cl::Hidden);
-
static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -3364,24 +3333,6 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
return ISD::SRL;
}
-TargetLoweringBase::CondMergingParams
-X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
- const Value *Lhs,
- const Value *Rhs) const {
- using namespace llvm::PatternMatch;
- int BaseCost = BrMergingBaseCostThresh.getValue();
- // a == b && a == c is a fast pattern on x86.
- ICmpInst::Predicate Pred;
- if (BaseCost >= 0 && Opc == Instruction::And &&
- match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
- Pred == ICmpInst::ICMP_EQ &&
- match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
- Pred == ICmpInst::ICMP_EQ)
- BaseCost += 1;
- return {BaseCost, BrMergingLikelyBias.getValue(),
- BrMergingUnlikelyBias.getValue()};
-}
-
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index fe1943b5760844..f93c54781846bf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1150,10 +1150,6 @@ namespace llvm {
bool preferScalarizeSplat(SDNode *N) const override;
- CondMergingParams
- getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
- const Value *Rhs) const override;
-
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
diff --git a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index e6f28c2057f775..0044d1c3568377 100644
--- a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -18,16 +18,15 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp
; CHECK-NEXT: movl _block, %esi
; CHECK-NEXT: movb %al, 1(%esi,%edx)
; CHECK-NEXT: cmpl %ecx, _last
-; CHECK-NEXT: setl %cl
+; CHECK-NEXT: jge LBB0_3
+; CHECK-NEXT: ## %bb.1: ## %label.0
; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: testb %al, %cl
-; CHECK-NEXT: je LBB0_2
-; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub
+; CHECK-NEXT: je LBB0_3
+; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
-; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub
+; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 28b4541c1bfc7f..7bdc4e19a1cf66 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: callq __ubyte_convert_to_ctype
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: js LBB0_6
+; CHECK-NEXT: js LBB0_4
; CHECK-NEXT: ## %bb.1: ## %cond_next.i
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: movq %rbx, %rdi
@@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: sarl $31, %ecx
; CHECK-NEXT: andl %eax, %ecx
; CHECK-NEXT: cmpl $-2, %ecx
-; CHECK-NEXT: je LBB0_10
+; CHECK-NEXT: je LBB0_8
; CHECK-NEXT: ## %bb.2: ## %cond_next.i
; CHECK-NEXT: cmpl $-1, %ecx
-; CHECK-NEXT: jne LBB0_3
-; CHECK-NEXT: LBB0_8: ## %bb4
+; CHECK-NEXT: jne LBB0_6
+; CHECK-NEXT: LBB0_3: ## %bb4
; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 16(%rax), %rax
-; CHECK-NEXT: jmp LBB0_9
-; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT: jmp LBB0_10
+; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: cmpl $-2, %eax
-; CHECK-NEXT: je LBB0_10
-; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit
-; CHECK-NEXT: cmpl $-1, %eax
; CHECK-NEXT: je LBB0_8
-; CHECK-NEXT: LBB0_3: ## %bb35
+; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT: cmpl $-1, %eax
+; CHECK-NEXT: je LBB0_3
+; CHECK-NEXT: LBB0_6: ## %bb35
; CHECK-NEXT: movq _PyUFunc_API at GOTPCREL(%rip), %r14
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *216(%rax)
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: je LBB0_4
-; CHECK-NEXT: ## %bb.12: ## %cond_false.i
-; CHECK-NEXT: setne %dil
+; CHECK-NEXT: je LBB0_11
+; CHECK-NEXT: ## %bb.7: ## %cond_false.i
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: movzbl %sil, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movl %eax, %r15d
; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: testb %dil, %al
-; CHECK-NEXT: jne LBB0_5
-; CHECK-NEXT: LBB0_13: ## %cond_true.i200
-; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: jne LBB0_15
-; CHECK-NEXT: ## %bb.14: ## %cond_true14.i
-; CHECK-NEXT: movl $4, %edi
-; CHECK-NEXT: callq _feraiseexcept
-; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit
-; CHECK-NEXT: xorl %ebx, %ebx
-; CHECK-NEXT: jmp LBB0_16
-; CHECK-NEXT: LBB0_10: ## %bb17
+; CHECK-NEXT: jne LBB0_12
+; CHECK-NEXT: jmp LBB0_14
+; CHECK-NEXT: LBB0_8: ## %bb17
; CHECK-NEXT: callq _PyErr_Occurred
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jne LBB0_23
-; CHECK-NEXT: ## %bb.11: ## %cond_next
+; CHECK-NEXT: jne LBB0_27
+; CHECK-NEXT: ## %bb.9: ## %cond_next
; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 80(%rax), %rax
-; CHECK-NEXT: LBB0_9: ## %bb4
+; CHECK-NEXT: LBB0_10: ## %bb4
; CHECK-NEXT: movq 96(%rax), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: callq *40(%rax)
-; CHECK-NEXT: jmp LBB0_24
-; CHECK-NEXT: LBB0_4: ## %cond_true.i
+; CHECK-NEXT: jmp LBB0_28
+; CHECK-NEXT: LBB0_11: ## %cond_true.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: testb %sil, %sil
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: je LBB0_14
+; CHECK-NEXT: LBB0_12: ## %cond_false.i
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: xorl %r15d, %r15d
-; CHECK-NEXT: orb %al, %cl
-; CHECK-NEXT: jne LBB0_13
-; CHECK-NEXT: LBB0_5: ## %cond_next17.i
+; CHECK-NEXT: je LBB0_14
+; CHECK-NEXT: ## %bb.13: ## %cond_next17.i
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movzbl %ah, %ebx
-; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT: jmp LBB0_18
+; CHECK-NEXT: LBB0_14: ## %cond_true.i200
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: jne LBB0_17
+; CHECK-NEXT: ## %bb.16: ## %cond_true14.i
+; CHECK-NEXT: movl $4, %edi
+; CHECK-NEXT: callq _feraiseexcept
+; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *224(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je LBB0_19
-; CHECK-NEXT: ## %bb.17: ## %cond_true61
+; CHECK-NEXT: je LBB0_21
+; CHECK-NEXT: ## %bb.19: ## %cond_true61
; CHECK-NEXT: movl %eax, %ebp
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq _.str5 at GOTPCREL(%rip), %rdi
@@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: callq *200(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: js LBB0_23
-; CHECK-NEXT: ## %bb.18: ## %cond_next73
+; CHECK-NEXT: js LBB0_27
+; CHECK-NEXT: ## %bb.20: ## %cond_next73
; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi
@@ -149,13 +146,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: callq *232(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: jne LBB0_23
-; CHECK-NEXT: LBB0_19: ## %cond_next89
+; CHECK-NEXT: jne LBB0_27
+; CHECK-NEXT: LBB0_21: ## %cond_next89
; CHECK-NEXT: movl $2, %edi
; CHECK-NEXT: callq _PyTuple_New
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_23
-; CHECK-NEXT: ## %bb.20: ## %cond_next97
+; CHECK-NEXT: je LBB0_27
+; CHECK-NEXT: ## %bb.22: ## %cond_next97
; CHECK-NEXT: movq %rax, %r14
; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %r12
; CHECK-NEXT: movq (%r12), %rax
@@ -163,8 +160,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_21
-; CHECK-NEXT: ## %bb.25: ## %cond_next135
+; CHECK-NEXT: je LBB0_25
+; CHECK-NEXT: ## %bb.23: ## %cond_next135
; CHECK-NEXT: movb %r15b, 16(%rax)
; CHECK-NEXT: movq %rax, 24(%r14)
; CHECK-NEXT: movq (%r12), %rax
@@ -172,22 +169,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_21
-; CHECK-NEXT: ## %bb.26: ## %cond_next182
+; CHECK-NEXT: je LBB0_25
+; CHECK-NEXT: ## %bb.24: ## %cond_next182
; CHECK-NEXT: movb %bl, 16(%rax)
; CHECK-NEXT: movq %rax, 32(%r14)
; CHECK-NEXT: movq %r14, %rax
-; CHECK-NEXT: jmp LBB0_24
-; CHECK-NEXT: LBB0_21: ## %cond_true113
+; CHECK-NEXT: jmp LBB0_28
+; CHECK-NEXT: LBB0_25: ## %cond_true113
; CHECK-NEXT: decq (%r14)
-; CHECK-NEXT: jne LBB0_23
-; CHECK-NEXT: ## %bb.22: ## %cond_true126
+; CHECK-NEXT: jne LBB0_27
+; CHECK-NEXT: ## %bb.26: ## %cond_true126
; CHECK-NEXT: movq 8(%r14), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: callq *48(%rax)
-; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock
+; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock
+; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
diff --git a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index d9d4424267d733..4482c5aec8e816 100644
--- a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -16,12 +16,15 @@ define void @_ada_c34007g() {
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: movl (%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: orl %eax, %ecx
-; CHECK-NEXT: sete %cl
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: testb %cl, %al
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: orl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: jne .LBB0_3
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: .LBB0_3: # %bb5507
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index e0b6e38647d801..dd60e641df2543 100644
--- a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: llc < %s -mtriple=i686-- -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 9
+; RUN: llc < %s -mtriple=i686-- -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16
; PR1909
@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <ptr> [#uses=1]
@@ -217,4 +217,4 @@ bb456: ; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
ret void
}
-declare i32 @printf(ptr, ...) nounwind
+declare i32 @printf(ptr, ...) nounwind
diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll
index 4ab9c545ed90da..502bbf3f5d118b 100644
--- a/llvm/test/CodeGen/X86/avx-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx-cmp.ll
@@ -26,33 +26,40 @@ declare void @scale() nounwind
define void @render(double %a0) nounwind {
; CHECK-LABEL: render:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: subq $16, %rsp
-; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jne .LBB2_5
+; CHECK-NEXT: jne .LBB2_6
; CHECK-NEXT: # %bb.1: # %for.cond5.preheader
-; CHECK-NEXT: movb $1, %bl
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: movb $1, %bpl
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB2_2: # %for.cond5
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: jne .LBB2_2
-; CHECK-NEXT: # %bb.3: # %for.body33.preheader
+; CHECK-NEXT: # %bb.3: # %for.cond5
+; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: testb %bpl, %bpl
+; CHECK-NEXT: jne .LBB2_2
+; CHECK-NEXT: # %bb.4: # %for.body33.preheader
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: vmovsd (%rsp), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: jne .LBB2_4
+; CHECK-NEXT: jne .LBB2_5
; CHECK-NEXT: jnp .LBB2_2
-; CHECK-NEXT: .LBB2_4: # %if.then
+; CHECK-NEXT: .LBB2_5: # %if.then
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: callq scale at PLT
; CHECK-NEXT: jmp .LBB2_2
-; CHECK-NEXT: .LBB2_5: # %for.end52
-; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: .LBB2_6: # %for.end52
+; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
entry:
br i1 undef, label %for.cond5, label %for.end52
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index 09419f870b7091..cd1953bec774d9 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -159,51 +159,43 @@ define i64 @test4(i64 %x) nounwind {
define i32 @test5(double %A) nounwind {
; CHECK-LABEL: test5:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = [7.5E+1,0.0E+0]
-; CHECK-NEXT: # encoding: [0xf2,0x0f,0x10,0x0d,A,A,A,A]
+; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
-; CHECK-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02]
-; CHECK-NEXT: movsd {{.*#+}} xmm2 = [1.5E+2,0.0E+0]
-; CHECK-NEXT: # encoding: [0xf2,0x0f,0x10,0x15,A,A,A,A]
+; CHECK-NEXT: ja .LBB5_3 # encoding: [0x77,A]
+; CHECK-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
-; CHECK-NEXT: cmpnltpd %xmm0, %xmm2 # encoding: [0x66,0x0f,0xc2,0xd0,0x05]
-; CHECK-NEXT: andpd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x54,0xd1]
-; CHECK-NEXT: movd %xmm2, %eax # encoding: [0x66,0x0f,0x7e,0xd0]
-; CHECK-NEXT: testb $1, %al # encoding: [0xa8,0x01]
-; CHECK-NEXT: jne .LBB5_1 # encoding: [0x75,A]
-; CHECK-NEXT: # fixup A - offset: 1, value: .LBB5_1-1, kind: FK_PCRel_1
-; CHECK-NEXT: # %bb.2: # %bb8
+; CHECK-NEXT: jb .LBB5_3 # encoding: [0x72,A]
+; CHECK-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; CHECK-NEXT: # %bb.2: # %bb12
+; CHECK-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; CHECK-NEXT: retq # encoding: [0xc3]
+; CHECK-NEXT: .LBB5_3: # %bb8
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: jmp foo at PLT # TAILCALL
; CHECK-NEXT: # encoding: [0xeb,A]
; CHECK-NEXT: # fixup A - offset: 1, value: foo at PLT-1, kind: FK_PCRel_1
-; CHECK-NEXT: .LBB5_1: # %bb12
-; CHECK-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
-; CHECK-NEXT: retq # encoding: [0xc3]
;
; NDD-LABEL: test5:
; NDD: # %bb.0: # %entry
-; NDD-NEXT: movsd {{.*#+}} xmm1 = [7.5E+1,0.0E+0]
-; NDD-NEXT: # encoding: [0xf2,0x0f,0x10,0x0d,A,A,A,A]
+; NDD-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
; NDD-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
-; NDD-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02]
-; NDD-NEXT: movsd {{.*#+}} xmm2 = [1.5E+2,0.0E+0]
-; NDD-NEXT: # encoding: [0xf2,0x0f,0x10,0x15,A,A,A,A]
+; NDD-NEXT: ja .LBB5_3 # encoding: [0x77,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.1: # %entry
+; NDD-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
; NDD-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
-; NDD-NEXT: cmpnltpd %xmm0, %xmm2 # encoding: [0x66,0x0f,0xc2,0xd0,0x05]
-; NDD-NEXT: andpd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x54,0xd1]
-; NDD-NEXT: movd %xmm2, %eax # encoding: [0x66,0x0f,0x7e,0xd0]
-; NDD-NEXT: testb $1, %al # encoding: [0xa8,0x01]
-; NDD-NEXT: jne .LBB5_1 # encoding: [0x75,A]
-; NDD-NEXT: # fixup A - offset: 1, value: .LBB5_1-1, kind: FK_PCRel_1
-; NDD-NEXT: # %bb.2: # %bb8
+; NDD-NEXT: jb .LBB5_3 # encoding: [0x72,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.2: # %bb12
+; NDD-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; NDD-NEXT: retq # encoding: [0xc3]
+; NDD-NEXT: .LBB5_3: # %bb8
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; NDD-NEXT: jmp foo at PLT # TAILCALL
; NDD-NEXT: # encoding: [0xeb,A]
; NDD-NEXT: # fixup A - offset: 1, value: foo at PLT-1, kind: FK_PCRel_1
-; NDD-NEXT: .LBB5_1: # %bb12
-; NDD-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
-; NDD-NEXT: retq # encoding: [0xc3]
entry:
%tmp2 = fcmp ogt double %A, 1.500000e+02
%tmp5 = fcmp ult double %A, 7.500000e+01
diff --git a/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll b/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll
index 6fded2eeaf35d9..842ee55d255aa3 100644
--- a/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-and-setcc.ll
@@ -16,8 +16,7 @@ declare i32 @printf(ptr nocapture readonly, ...)
;CHECK: cmpl
;CHECK: setl
;CHECK: orb
-;CHECK: testb
-;CHECK: jne
+;CHECK: je
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
; Function Attrs: optsize ssp uwtable
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index fa45afbb634c4d..1372bd80473518 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -178,13 +178,13 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $136, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: orl %edi, %eax
-; X86-NEXT: movl %ebp, %ecx
-; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: sete %bl
@@ -195,33 +195,30 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: sete %al
; X86-NEXT: orb %bl, %al
; X86-NEXT: movb %al, (%esp) # 1-byte Spill
-; X86-NEXT: bsrl %edi, %edx
+; X86-NEXT: bsrl %esi, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl %esi, %ecx
+; X86-NEXT: bsrl %edi, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: testl %esi, %esi
; X86-NEXT: cmovnel %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: bsrl %eax, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl %ebp, %ebp
-; X86-NEXT: movl %esi, %edi
+; X86-NEXT: bsrl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %esi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: xorl $31, %ebp
; X86-NEXT: addl $32, %ebp
; X86-NEXT: testl %eax, %eax
; X86-NEXT: cmovnel %edx, %ebp
; X86-NEXT: addl $64, %ebp
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: orl %ebx, %edi
; X86-NEXT: cmovnel %ecx, %ebp
; X86-NEXT: bsrl %esi, %edx
-; X86-NEXT: movl %esi, %ebx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bsrl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: bsrl %ebx, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: testl %esi, %esi
@@ -233,51 +230,51 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: xorl $31, %edx
; X86-NEXT: addl $32, %edx
; X86-NEXT: testl %edi, %edi
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: cmovnel %esi, %edx
; X86-NEXT: addl $64, %edx
-; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subl %edx, %ebp
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
; X86-NEXT: movl $0, %edi
; X86-NEXT: sbbl %edi, %edi
-; X86-NEXT: movl $127, %ecx
-; X86-NEXT: cmpl %ebp, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %edi, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: orb (%esp), %cl # 1-byte Folded Reload
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: movl $127, %edx
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: xorl $127, %eax
+; X86-NEXT: cmpl %ebp, %edx
+; X86-NEXT: movl $0, %edx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: movl $0, %edx
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: sete %al
-; X86-NEXT: testb %cl, %cl
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: cmovnel %edi, %edx
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: setb %dl
+; X86-NEXT: orb (%esp), %dl # 1-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmovnel %ecx, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmovnel %edi, %esi
+; X86-NEXT: cmovnel %ecx, %esi
+; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmovnel %edi, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmovnel %edi, %ebx
-; X86-NEXT: orb %cl, %al
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: jne .LBB4_7
-; X86-NEXT: # %bb.1: # %udiv-bb1
+; X86-NEXT: cmovnel %ecx, %ebp
+; X86-NEXT: jne .LBB4_8
+; X86-NEXT: # %bb.1: # %_udiv-special-cases
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: xorl $127, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: orl %ebx, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: je .LBB4_8
+; X86-NEXT: # %bb.2: # %udiv-bb1
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -290,8 +287,9 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %ecx, %edi
; X86-NEXT: xorb $127, %al
; X86-NEXT: movb %al, %ch
; X86-NEXT: andb $7, %ch
@@ -303,7 +301,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl 132(%esp,%eax), %esi
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shldl %cl, %edx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
; X86-NEXT: shll %cl, %edx
; X86-NEXT: notb %cl
; X86-NEXT: movl 124(%esp,%eax), %ebp
@@ -311,69 +309,68 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: shrl %esi
; X86-NEXT: shrl %cl, %esi
; X86-NEXT: orl %edx, %esi
-; X86-NEXT: movl 120(%esp,%eax), %eax
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: movl 120(%esp,%eax), %ebp
; X86-NEXT: movb %ch, %cl
-; X86-NEXT: shldl %cl, %eax, %ebp
-; X86-NEXT: shll %cl, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: addl $1, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %ebp, %edx
+; X86-NEXT: shll %cl, %ebp
+; X86-NEXT: addl $1, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $0, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: jae .LBB4_2
-; X86-NEXT: # %bb.5:
+; X86-NEXT: jae .LBB4_3
+; X86-NEXT: # %bb.6:
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: jmp .LBB4_6
-; X86-NEXT: .LBB4_2: # %udiv-preheader
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: jmp .LBB4_7
+; X86-NEXT: .LBB4_3: # %udiv-preheader
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movb %al, %ch
; X86-NEXT: andb $7, %ch
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $15, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl 84(%esp,%eax), %ebx
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 80(%esp,%eax), %esi
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: movb %ch, %cl
-; X86-NEXT: shrdl %cl, %ebx, %edx
+; X86-NEXT: movl 84(%esp,%eax), %ebp
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl 80(%esp,%eax), %ebx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 72(%esp,%eax), %ebp
-; X86-NEXT: movl 76(%esp,%eax), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: notb %cl
-; X86-NEXT: addl %esi, %esi
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movb %ch, %cl
+; X86-NEXT: shrdl %cl, %ebp, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 72(%esp,%eax), %esi
+; X86-NEXT: movl 76(%esp,%eax), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: shrl %cl, %edx
+; X86-NEXT: notb %cl
+; X86-NEXT: addl %ebx, %ebx
+; X86-NEXT: shll %cl, %ebx
+; X86-NEXT: orl %edx, %ebx
; X86-NEXT: movb %ch, %cl
-; X86-NEXT: shrl %cl, %ebx
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: shrdl %cl, %edx, %ebp
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrl %cl, %ebp
+; X86-NEXT: shrdl %cl, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -383,25 +380,25 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: adcl $-1, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: adcl $-1, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: adcl $-1, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl (%esp), %edx # 4-byte Reload
+; X86-NEXT: movl (%esp), %esi # 4-byte Reload
; X86-NEXT: .p2align 4, 0x90
-; X86-NEXT: .LBB4_3: # %udiv-do-while
+; X86-NEXT: .LBB4_4: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %ebp
+; X86-NEXT: movl %ebp, (%esp) # 4-byte Spill
+; X86-NEXT: shldl $1, %ebx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; X86-NEXT: shldl $1, %ebp, %edi
-; X86-NEXT: movl %edi, (%esp) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $1, %ebx, %ebp
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: shldl $1, %esi, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %ebp, %ebx
+; X86-NEXT: shldl $1, %esi, %ebp
; X86-NEXT: shldl $1, %edi, %esi
+; X86-NEXT: orl %ecx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shldl $1, %eax, %edi
; X86-NEXT: orl %ecx, %edi
@@ -410,16 +407,14 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: shldl $1, %edi, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $1, %edx, %edi
-; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: addl %edi, %edi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %edx, %edx
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: cmpl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: cmpl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %ebp, %ecx
+; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload
; X86-NEXT: sarl $31, %ecx
@@ -434,81 +429,84 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: subl %ecx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl %ecx, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %eax, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %edi, %ebp
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: sbbl %eax, (%esp) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl $-1, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl $-1, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: adcl $-1, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: adcl $-1, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edi, %eax
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %ebx, %ecx
-; X86-NEXT: movl (%esp), %edi # 4-byte Reload
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: movl (%esp), %ebp # 4-byte Reload
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: jne .LBB4_3
-; X86-NEXT: # %bb.4:
-; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: jne .LBB4_4
+; X86-NEXT: # %bb.5:
+; X86-NEXT: movl %esi, (%esp) # 4-byte Spill
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: .LBB4_6: # %udiv-loop-exit
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: .LBB4_7: # %udiv-loop-exit
+; X86-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NEXT: shldl $1, %esi, %edx
; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: shldl $1, %ebp, %esi
+; X86-NEXT: shldl $1, %ebx, %esi
; X86-NEXT: orl %ecx, %esi
-; X86-NEXT: movl (%esp), %ebx # 4-byte Reload
-; X86-NEXT: shldl $1, %ebx, %ebp
-; X86-NEXT: orl %ecx, %ebp
-; X86-NEXT: addl %ebx, %ebx
-; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: shldl $1, %ebp, %ebx
+; X86-NEXT: orl %ecx, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %ebp, %ebp
+; X86-NEXT: orl %eax, %ebp
+; X86-NEXT: .LBB4_8: # %udiv-end
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: .LBB4_7: # %udiv-end
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, (%eax)
-; X86-NEXT: movl %ebp, 4(%eax)
+; X86-NEXT: movl %ebp, (%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %edx, 12(%eax)
-; X86-NEXT: movl %ebx, %ecx
-; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: imull %ebp, %esi
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: imull %ecx, %esi
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: imull %ecx, %ebp
-; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: imull %ecx, %edi
+; X86-NEXT: addl %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: imull %esi, %edi
-; X86-NEXT: addl %edx, %edi
+; X86-NEXT: imull %esi, %ebp
+; X86-NEXT: addl %edx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: addl %edi, %ebx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %ebp, %ebx
-; X86-NEXT: movl (%esp), %ebp # 4-byte Reload
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: addl (%esp), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT: adcl %edi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -524,7 +522,7 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %edi, %ebp
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -532,11 +530,11 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: sbbl %eax, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
diff --git a/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll b/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll
index f42c2f8f144763..0250b1b4a7f861 100644
--- a/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll
+++ b/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll
@@ -28,70 +28,78 @@ define i32 @decode_sb(ptr %t, i32 %bl, i32 %_msprop1966, i32 %sub.i, i64 %idxpro
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: movl %r9d, %ebx
; CHECK-NEXT: # kill: def $edx killed $edx def $rdx
-; CHECK-NEXT: movabsq $87960930222080, %r14 # imm = 0x500000000000
-; CHECK-NEXT: movl 0, %r13d
-; CHECK-NEXT: movl %esi, %r15d
-; CHECK-NEXT: # implicit-def: $r12d
+; CHECK-NEXT: movabsq $87960930222080, %r15 # imm = 0x500000000000
+; CHECK-NEXT: movl 0, %r11d
+; CHECK-NEXT: movl %esi, %r12d
+; CHECK-NEXT: # implicit-def: $r13d
; CHECK-NEXT: testb $1, %bl
-; CHECK-NEXT: jne .LBB0_6
+; CHECK-NEXT: jne .LBB0_7
; CHECK-NEXT: # %bb.1: # %if.else
-; CHECK-NEXT: movl %ecx, %r12d
-; CHECK-NEXT: andl $1, %r12d
-; CHECK-NEXT: movzbl 544(%r12), %r9d
+; CHECK-NEXT: movq %r8, %r14
+; CHECK-NEXT: movl %ecx, %r13d
+; CHECK-NEXT: andl $1, %r13d
+; CHECK-NEXT: movzbl 544(%r13), %r8d
+; CHECK-NEXT: andl $1, %r8d
+; CHECK-NEXT: movl %r15d, %r9d
; CHECK-NEXT: andl $1, %r9d
; CHECK-NEXT: movl %r14d, %r10d
; CHECK-NEXT: andl $1, %r10d
-; CHECK-NEXT: andl $1, %r8d
; CHECK-NEXT: movabsq $17592186044416, %rax # imm = 0x100000000000
-; CHECK-NEXT: orq %r8, %rax
-; CHECK-NEXT: movl %esi, %r8d
+; CHECK-NEXT: orq %r10, %rax
+; CHECK-NEXT: movl %esi, %r10d
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shrl %cl, %r8d
-; CHECK-NEXT: andl $2, %r8d
+; CHECK-NEXT: shrl %cl, %r10d
+; CHECK-NEXT: andl $2, %r10d
; CHECK-NEXT: testb $1, %bl
-; CHECK-NEXT: cmoveq %r10, %rax
-; CHECK-NEXT: orl %r9d, %edx
-; CHECK-NEXT: movq %r13, %rcx
+; CHECK-NEXT: cmoveq %r9, %rax
+; CHECK-NEXT: orl %r8d, %edx
+; CHECK-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq %r11, %rcx
; CHECK-NEXT: orq $1, %rcx
-; CHECK-NEXT: orl %esi, %r8d
+; CHECK-NEXT: orl %esi, %r10d
; CHECK-NEXT: movl $1, %r8d
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.2: # %if.else
; CHECK-NEXT: movl (%rax), %r8d
; CHECK-NEXT: .LBB0_3: # %if.else
; CHECK-NEXT: shlq $5, %rdx
-; CHECK-NEXT: movq %r15, %rax
+; CHECK-NEXT: movq %r12, %rax
; CHECK-NEXT: shlq $7, %rax
; CHECK-NEXT: leaq (%rax,%rdx), %rsi
; CHECK-NEXT: addq $1248, %rsi # imm = 0x4E0
; CHECK-NEXT: movq %rcx, 0
-; CHECK-NEXT: movq %rdi, %r14
+; CHECK-NEXT: movq %rdi, %r15
; CHECK-NEXT: movl %r8d, (%rdi)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: callq *%rax
+; CHECK-NEXT: xorq $1, %r14
+; CHECK-NEXT: cmpl $0, (%r14)
+; CHECK-NEXT: je .LBB0_6
+; CHECK-NEXT: # %bb.4: # %if.else
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB0_4
-; CHECK-NEXT: # %bb.5: # %bb19
+; CHECK-NEXT: je .LBB0_5
+; CHECK-NEXT: .LBB0_6: # %bb19
; CHECK-NEXT: testb $1, %bl
-; CHECK-NEXT: movq %r14, %rdi
-; CHECK-NEXT: movabsq $87960930222080, %r14 # imm = 0x500000000000
-; CHECK-NEXT: jne .LBB0_7
-; CHECK-NEXT: .LBB0_6: # %if.end69
-; CHECK-NEXT: movl %r13d, 0
+; CHECK-NEXT: movq %r15, %rdi
+; CHECK-NEXT: movabsq $87960930222080, %r15 # imm = 0x500000000000
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; CHECK-NEXT: jne .LBB0_8
+; CHECK-NEXT: .LBB0_7: # %if.end69
+; CHECK-NEXT: movl %r11d, 0
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorl %r8d, %r8d
; CHECK-NEXT: callq *%rax
-; CHECK-NEXT: xorq %r14, %r15
-; CHECK-NEXT: movslq %r12d, %rax
-; CHECK-NEXT: movzbl (%r15), %ecx
+; CHECK-NEXT: xorq %r15, %r12
+; CHECK-NEXT: movslq %r13d, %rax
+; CHECK-NEXT: movzbl (%r12), %ecx
; CHECK-NEXT: movb %cl, 544(%rax)
-; CHECK-NEXT: .LBB0_7: # %land.lhs.true56
+; CHECK-NEXT: .LBB0_8: # %land.lhs.true56
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
@@ -102,7 +110,7 @@ define i32 @decode_sb(ptr %t, i32 %bl, i32 %_msprop1966, i32 %sub.i, i64 %idxpro
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB0_4: # %bb
+; CHECK-NEXT: .LBB0_5: # %bb
entry:
%i = load i32, ptr null, align 8
br i1 %cmp54, label %if.end69, label %if.else
diff --git a/llvm/test/CodeGen/X86/lsr-addrecloops.ll b/llvm/test/CodeGen/X86/lsr-addrecloops.ll
index d41942bea69da1..963405c8b0b3d3 100644
--- a/llvm/test/CodeGen/X86/lsr-addrecloops.ll
+++ b/llvm/test/CodeGen/X86/lsr-addrecloops.ll
@@ -15,38 +15,39 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
; CHECK-NEXT: movl $1, %r10d
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vcmpneqps %xmm0, %xmm1, %k0
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vcmpneqps %xmm0, %xmm1, %k1
-; CHECK-NEXT: korw %k0, %k1, %k0
-; CHECK-NEXT: kmovd %k0, %r11d
-; CHECK-NEXT: testb $1, %r11b
-; CHECK-NEXT: je .LBB0_2
-; CHECK-NEXT: # %bb.19: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: .LBB0_20: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: incq %r10
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: cmpq %r10, %rcx
-; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: jmp .LBB0_17
-; CHECK-NEXT: .LBB0_2: # %vector.body807.preheader
+; CHECK-NEXT: je .LBB0_18
+; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vucomiss %xmm0, %xmm1
+; CHECK-NEXT: jne .LBB0_20
+; CHECK-NEXT: jp .LBB0_20
+; CHECK-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vucomiss %xmm0, %xmm1
+; CHECK-NEXT: jne .LBB0_20
+; CHECK-NEXT: jp .LBB0_20
+; CHECK-NEXT: # %bb.3: # %vector.body807.preheader
; CHECK-NEXT: leaq 1(%rcx), %rdx
; CHECK-NEXT: movl %edx, %esi
; CHECK-NEXT: andl $7, %esi
; CHECK-NEXT: cmpq $7, %rcx
-; CHECK-NEXT: jae .LBB0_4
-; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: jae .LBB0_5
+; CHECK-NEXT: # %bb.4:
; CHECK-NEXT: xorl %r9d, %r9d
-; CHECK-NEXT: jmp .LBB0_6
-; CHECK-NEXT: .LBB0_4: # %vector.body807.preheader.new
+; CHECK-NEXT: jmp .LBB0_7
+; CHECK-NEXT: .LBB0_5: # %vector.body807.preheader.new
; CHECK-NEXT: movq %rdx, %r10
; CHECK-NEXT: andq $-8, %r10
; CHECK-NEXT: xorl %r9d, %r9d
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_5: # %vector.body807
+; CHECK-NEXT: .LBB0_6: # %vector.body807
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: leaq (%rdi,%r9), %r11
; CHECK-NEXT: vmovups %ymm0, (%rax,%r11)
@@ -59,43 +60,43 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
; CHECK-NEXT: vmovups %ymm0, 7(%rax,%r11)
; CHECK-NEXT: addq $8, %r9
; CHECK-NEXT: cmpq %r9, %r10
-; CHECK-NEXT: jne .LBB0_5
-; CHECK-NEXT: .LBB0_6: # %.lr.ph373.unr-lcssa
+; CHECK-NEXT: jne .LBB0_6
+; CHECK-NEXT: .LBB0_7: # %.lr.ph373.unr-lcssa
; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: je .LBB0_9
-; CHECK-NEXT: # %bb.7: # %vector.body807.epil.preheader
+; CHECK-NEXT: je .LBB0_10
+; CHECK-NEXT: # %bb.8: # %vector.body807.epil.preheader
; CHECK-NEXT: addq %rdi, %r9
; CHECK-NEXT: xorl %r10d, %r10d
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_8: # %vector.body807.epil
+; CHECK-NEXT: .LBB0_9: # %vector.body807.epil
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: leaq (%r9,%r10), %r11
; CHECK-NEXT: vmovups %ymm0, (%rax,%r11)
; CHECK-NEXT: incq %r10
; CHECK-NEXT: cmpq %r10, %rsi
-; CHECK-NEXT: jne .LBB0_8
-; CHECK-NEXT: .LBB0_9: # %.lr.ph373
+; CHECK-NEXT: jne .LBB0_9
+; CHECK-NEXT: .LBB0_10: # %.lr.ph373
; CHECK-NEXT: testb $1, %r8b
-; CHECK-NEXT: je .LBB0_10
-; CHECK-NEXT: # %bb.18: # %scalar.ph839.preheader
+; CHECK-NEXT: je .LBB0_11
+; CHECK-NEXT: # %bb.19: # %scalar.ph839.preheader
; CHECK-NEXT: movl $0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB0_10: # %vector.body847.preheader
+; CHECK-NEXT: .LBB0_11: # %vector.body847.preheader
; CHECK-NEXT: movl %edx, %esi
; CHECK-NEXT: andl $7, %esi
; CHECK-NEXT: cmpq $7, %rcx
-; CHECK-NEXT: jae .LBB0_12
-; CHECK-NEXT: # %bb.11:
+; CHECK-NEXT: jae .LBB0_13
+; CHECK-NEXT: # %bb.12:
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: jmp .LBB0_14
-; CHECK-NEXT: .LBB0_12: # %vector.body847.preheader.new
+; CHECK-NEXT: jmp .LBB0_15
+; CHECK-NEXT: .LBB0_13: # %vector.body847.preheader.new
; CHECK-NEXT: andq $-8, %rdx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_13: # %vector.body847
+; CHECK-NEXT: .LBB0_14: # %vector.body847
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: leaq (%rdi,%rcx), %r8
; CHECK-NEXT: vmovups %ymm0, 96(%rax,%r8)
@@ -108,23 +109,23 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
; CHECK-NEXT: vmovups %ymm0, 103(%rax,%r8)
; CHECK-NEXT: addq $8, %rcx
; CHECK-NEXT: cmpq %rcx, %rdx
-; CHECK-NEXT: jne .LBB0_13
-; CHECK-NEXT: .LBB0_14: # %common.ret.loopexit.unr-lcssa
+; CHECK-NEXT: jne .LBB0_14
+; CHECK-NEXT: .LBB0_15: # %common.ret.loopexit.unr-lcssa
; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: je .LBB0_17
-; CHECK-NEXT: # %bb.15: # %vector.body847.epil.preheader
+; CHECK-NEXT: je .LBB0_18
+; CHECK-NEXT: # %bb.16: # %vector.body847.epil.preheader
; CHECK-NEXT: leaq 96(%rcx,%rdi), %rcx
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_16: # %vector.body847.epil
+; CHECK-NEXT: .LBB0_17: # %vector.body847.epil
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: leaq (%rcx,%rdx), %rdi
; CHECK-NEXT: vmovups %ymm0, (%rax,%rdi)
; CHECK-NEXT: incq %rdx
; CHECK-NEXT: cmpq %rdx, %rsi
-; CHECK-NEXT: jne .LBB0_16
-; CHECK-NEXT: .LBB0_17: # %common.ret
+; CHECK-NEXT: jne .LBB0_17
+; CHECK-NEXT: .LBB0_18: # %common.ret
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
.preheader263:
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index e8b3121ecfb523..a7564c9622c5ca 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4440,14 +4440,16 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: testl %eax, %eax
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: movd %xmm1, %ecx
-; SSE2-NEXT: orb %al, %cl
-; SSE2-NEXT: testb $1, %cl
-; SSE2-NEXT: je .LBB97_2
-; SSE2-NEXT: # %bb.1:
+; SSE2-NEXT: jne .LBB97_2
+; SSE2-NEXT: # %bb.1: # %entry
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: jne .LBB97_2
+; SSE2-NEXT: # %bb.3: # %middle.block
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: retq
+; SSE2-NEXT: .LBB97_2:
; SSE2-NEXT: movw $0, 0
-; SSE2-NEXT: .LBB97_2: # %middle.block
; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: retq
;
@@ -4458,14 +4460,16 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
; SSE41-NEXT: movmskpd %xmm0, %eax
; SSE41-NEXT: testl %eax, %eax
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: movd %xmm0, %ecx
-; SSE41-NEXT: orb %al, %cl
-; SSE41-NEXT: testb $1, %cl
-; SSE41-NEXT: je .LBB97_2
-; SSE41-NEXT: # %bb.1:
+; SSE41-NEXT: jne .LBB97_2
+; SSE41-NEXT: # %bb.1: # %entry
+; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: testb $1, %al
+; SSE41-NEXT: jne .LBB97_2
+; SSE41-NEXT: # %bb.3: # %middle.block
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: retq
+; SSE41-NEXT: .LBB97_2:
; SSE41-NEXT: movw $0, 0
-; SSE41-NEXT: .LBB97_2: # %middle.block
; SSE41-NEXT: xorl %eax, %eax
; SSE41-NEXT: retq
;
@@ -4475,14 +4479,16 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vtestpd %xmm0, %xmm0
-; AVX1-NEXT: setne %al
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: orb %al, %cl
-; AVX1-NEXT: testb $1, %cl
-; AVX1-NEXT: je .LBB97_2
-; AVX1-NEXT: # %bb.1:
+; AVX1-NEXT: jne .LBB97_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: testb $1, %al
+; AVX1-NEXT: jne .LBB97_2
+; AVX1-NEXT: # %bb.3: # %middle.block
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: retq
+; AVX1-NEXT: .LBB97_2:
; AVX1-NEXT: movw $0, 0
-; AVX1-NEXT: .LBB97_2: # %middle.block
; AVX1-NEXT: xorl %eax, %eax
; AVX1-NEXT: retq
;
@@ -4492,14 +4498,16 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vtestpd %xmm0, %xmm0
-; AVX2-NEXT: setne %al
-; AVX2-NEXT: vmovd %xmm0, %ecx
-; AVX2-NEXT: orb %al, %cl
-; AVX2-NEXT: testb $1, %cl
-; AVX2-NEXT: je .LBB97_2
-; AVX2-NEXT: # %bb.1:
+; AVX2-NEXT: jne .LBB97_2
+; AVX2-NEXT: # %bb.1: # %entry
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: testb $1, %al
+; AVX2-NEXT: jne .LBB97_2
+; AVX2-NEXT: # %bb.3: # %middle.block
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: retq
+; AVX2-NEXT: .LBB97_2:
; AVX2-NEXT: movw $0, 0
-; AVX2-NEXT: .LBB97_2: # %middle.block
; AVX2-NEXT: xorl %eax, %eax
; AVX2-NEXT: retq
;
@@ -4509,15 +4517,18 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: testb $3, %cl
-; KNL-NEXT: setne %cl
-; KNL-NEXT: orb %cl, %al
+; KNL-NEXT: testb $3, %al
+; KNL-NEXT: jne .LBB97_2
+; KNL-NEXT: # %bb.1: # %entry
+; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb $1, %al
-; KNL-NEXT: je .LBB97_2
-; KNL-NEXT: # %bb.1:
+; KNL-NEXT: jne .LBB97_2
+; KNL-NEXT: # %bb.3: # %middle.block
+; KNL-NEXT: xorl %eax, %eax
+; KNL-NEXT: vzeroupper
+; KNL-NEXT: retq
+; KNL-NEXT: .LBB97_2:
; KNL-NEXT: movw $0, 0
-; KNL-NEXT: .LBB97_2: # %middle.block
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -4528,14 +4539,16 @@ define i32 @pr67287(<2 x i64> %broadcast.splatinsert25) {
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; SKX-NEXT: vptestnmq %xmm0, %xmm0, %k0
; SKX-NEXT: kortestb %k0, %k0
-; SKX-NEXT: setne %al
-; SKX-NEXT: kmovd %k0, %ecx
-; SKX-NEXT: orb %al, %cl
-; SKX-NEXT: testb $1, %cl
-; SKX-NEXT: je .LBB97_2
-; SKX-NEXT: # %bb.1:
+; SKX-NEXT: jne .LBB97_2
+; SKX-NEXT: # %bb.1: # %entry
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: testb $1, %al
+; SKX-NEXT: jne .LBB97_2
+; SKX-NEXT: # %bb.3: # %middle.block
+; SKX-NEXT: xorl %eax, %eax
+; SKX-NEXT: retq
+; SKX-NEXT: .LBB97_2:
; SKX-NEXT: movw $0, 0
-; SKX-NEXT: .LBB97_2: # %middle.block
; SKX-NEXT: xorl %eax, %eax
; SKX-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/or-branch.ll b/llvm/test/CodeGen/X86/or-branch.ll
index c6df237393e4a0..5d5cc2cb32f1ce 100644
--- a/llvm/test/CodeGen/X86/or-branch.ll
+++ b/llvm/test/CodeGen/X86/or-branch.ll
@@ -5,13 +5,12 @@
define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP2-LABEL: foo:
; JUMP2: # %bb.0: # %entry
-; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; JUMP2-NEXT: setne %al
; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp)
-; JUMP2-NEXT: setge %cl
-; JUMP2-NEXT: testb %al, %cl
+; JUMP2-NEXT: jl bar at PLT # TAILCALL
+; JUMP2-NEXT: # %bb.1: # %entry
+; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; JUMP2-NEXT: je bar at PLT # TAILCALL
-; JUMP2-NEXT: # %bb.1: # %UnifiedReturnBlock
+; JUMP2-NEXT: # %bb.2: # %UnifiedReturnBlock
; JUMP2-NEXT: retl
;
; JUMP1-LABEL: foo:
diff --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
index 3354c99a361bf6..9069688c8037c7 100644
--- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
+++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -14,33 +14,31 @@ declare i32 @bar(i64)
define i1 @plus_one() nounwind {
; CHECK32-LABEL: plus_one:
; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: movzbl M, %eax
; CHECK32-NEXT: incl L
-; CHECK32-NEXT: sete %al
-; CHECK32-NEXT: movzbl M, %ecx
-; CHECK32-NEXT: andb $8, %cl
-; CHECK32-NEXT: shrb $3, %cl
-; CHECK32-NEXT: testb %cl, %al
-; CHECK32-NEXT: je .LBB0_1
-; CHECK32-NEXT: # %bb.2: # %exit2
+; CHECK32-NEXT: jne .LBB0_2
+; CHECK32-NEXT: # %bb.1: # %entry
+; CHECK32-NEXT: andb $8, %al
+; CHECK32-NEXT: je .LBB0_2
+; CHECK32-NEXT: # %bb.3: # %exit2
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: retl
-; CHECK32-NEXT: .LBB0_1: # %exit
+; CHECK32-NEXT: .LBB0_2: # %exit
; CHECK32-NEXT: movb $1, %al
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: plus_one:
; CHECK64: # %bb.0: # %entry
+; CHECK64-NEXT: movzbl M(%rip), %eax
; CHECK64-NEXT: incl L(%rip)
-; CHECK64-NEXT: sete %al
-; CHECK64-NEXT: movzbl M(%rip), %ecx
-; CHECK64-NEXT: andb $8, %cl
-; CHECK64-NEXT: shrb $3, %cl
-; CHECK64-NEXT: testb %cl, %al
-; CHECK64-NEXT: je .LBB0_1
-; CHECK64-NEXT: # %bb.2: # %exit2
+; CHECK64-NEXT: jne .LBB0_2
+; CHECK64-NEXT: # %bb.1: # %entry
+; CHECK64-NEXT: andb $8, %al
+; CHECK64-NEXT: je .LBB0_2
+; CHECK64-NEXT: # %bb.3: # %exit2
; CHECK64-NEXT: xorl %eax, %eax
; CHECK64-NEXT: retq
-; CHECK64-NEXT: .LBB0_1: # %exit
+; CHECK64-NEXT: .LBB0_2: # %exit
; CHECK64-NEXT: movb $1, %al
; CHECK64-NEXT: retq
entry:
@@ -65,32 +63,30 @@ define i1 @plus_forty_two() nounwind {
; CHECK32-LABEL: plus_forty_two:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movzbl M, %eax
-; CHECK32-NEXT: andb $8, %al
-; CHECK32-NEXT: shrb $3, %al
; CHECK32-NEXT: addl $42, L
-; CHECK32-NEXT: sete %cl
-; CHECK32-NEXT: testb %al, %cl
-; CHECK32-NEXT: je .LBB1_1
-; CHECK32-NEXT: # %bb.2: # %exit2
+; CHECK32-NEXT: jne .LBB1_2
+; CHECK32-NEXT: # %bb.1: # %entry
+; CHECK32-NEXT: andb $8, %al
+; CHECK32-NEXT: je .LBB1_2
+; CHECK32-NEXT: # %bb.3: # %exit2
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: retl
-; CHECK32-NEXT: .LBB1_1: # %exit
+; CHECK32-NEXT: .LBB1_2: # %exit
; CHECK32-NEXT: movb $1, %al
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: plus_forty_two:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: movzbl M(%rip), %eax
-; CHECK64-NEXT: andb $8, %al
-; CHECK64-NEXT: shrb $3, %al
; CHECK64-NEXT: addl $42, L(%rip)
-; CHECK64-NEXT: sete %cl
-; CHECK64-NEXT: testb %al, %cl
-; CHECK64-NEXT: je .LBB1_1
-; CHECK64-NEXT: # %bb.2: # %exit2
+; CHECK64-NEXT: jne .LBB1_2
+; CHECK64-NEXT: # %bb.1: # %entry
+; CHECK64-NEXT: andb $8, %al
+; CHECK64-NEXT: je .LBB1_2
+; CHECK64-NEXT: # %bb.3: # %exit2
; CHECK64-NEXT: xorl %eax, %eax
; CHECK64-NEXT: retq
-; CHECK64-NEXT: .LBB1_1: # %exit
+; CHECK64-NEXT: .LBB1_2: # %exit
; CHECK64-NEXT: movb $1, %al
; CHECK64-NEXT: retq
entry:
@@ -115,32 +111,30 @@ define i1 @minus_one() nounwind {
; CHECK32-LABEL: minus_one:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movzbl M, %eax
-; CHECK32-NEXT: andb $8, %al
-; CHECK32-NEXT: shrb $3, %al
; CHECK32-NEXT: decl L
-; CHECK32-NEXT: sete %cl
-; CHECK32-NEXT: testb %al, %cl
-; CHECK32-NEXT: je .LBB2_1
-; CHECK32-NEXT: # %bb.2: # %exit2
+; CHECK32-NEXT: jne .LBB2_2
+; CHECK32-NEXT: # %bb.1: # %entry
+; CHECK32-NEXT: andb $8, %al
+; CHECK32-NEXT: je .LBB2_2
+; CHECK32-NEXT: # %bb.3: # %exit2
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: retl
-; CHECK32-NEXT: .LBB2_1: # %exit
+; CHECK32-NEXT: .LBB2_2: # %exit
; CHECK32-NEXT: movb $1, %al
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: minus_one:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: movzbl M(%rip), %eax
-; CHECK64-NEXT: andb $8, %al
-; CHECK64-NEXT: shrb $3, %al
; CHECK64-NEXT: decl L(%rip)
-; CHECK64-NEXT: sete %cl
-; CHECK64-NEXT: testb %al, %cl
-; CHECK64-NEXT: je .LBB2_1
-; CHECK64-NEXT: # %bb.2: # %exit2
+; CHECK64-NEXT: jne .LBB2_2
+; CHECK64-NEXT: # %bb.1: # %entry
+; CHECK64-NEXT: andb $8, %al
+; CHECK64-NEXT: je .LBB2_2
+; CHECK64-NEXT: # %bb.3: # %exit2
; CHECK64-NEXT: xorl %eax, %eax
; CHECK64-NEXT: retq
-; CHECK64-NEXT: .LBB2_1: # %exit
+; CHECK64-NEXT: .LBB2_2: # %exit
; CHECK64-NEXT: movb $1, %al
; CHECK64-NEXT: retq
entry:
@@ -165,32 +159,30 @@ define i1 @minus_forty_two() nounwind {
; CHECK32-LABEL: minus_forty_two:
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movzbl M, %eax
-; CHECK32-NEXT: andb $8, %al
-; CHECK32-NEXT: shrb $3, %al
; CHECK32-NEXT: addl $-42, L
-; CHECK32-NEXT: sete %cl
-; CHECK32-NEXT: testb %al, %cl
-; CHECK32-NEXT: je .LBB3_1
-; CHECK32-NEXT: # %bb.2: # %exit2
+; CHECK32-NEXT: jne .LBB3_2
+; CHECK32-NEXT: # %bb.1: # %entry
+; CHECK32-NEXT: andb $8, %al
+; CHECK32-NEXT: je .LBB3_2
+; CHECK32-NEXT: # %bb.3: # %exit2
; CHECK32-NEXT: xorl %eax, %eax
; CHECK32-NEXT: retl
-; CHECK32-NEXT: .LBB3_1: # %exit
+; CHECK32-NEXT: .LBB3_2: # %exit
; CHECK32-NEXT: movb $1, %al
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: minus_forty_two:
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: movzbl M(%rip), %eax
-; CHECK64-NEXT: andb $8, %al
-; CHECK64-NEXT: shrb $3, %al
; CHECK64-NEXT: addl $-42, L(%rip)
-; CHECK64-NEXT: sete %cl
-; CHECK64-NEXT: testb %al, %cl
-; CHECK64-NEXT: je .LBB3_1
-; CHECK64-NEXT: # %bb.2: # %exit2
+; CHECK64-NEXT: jne .LBB3_2
+; CHECK64-NEXT: # %bb.1: # %entry
+; CHECK64-NEXT: andb $8, %al
+; CHECK64-NEXT: je .LBB3_2
+; CHECK64-NEXT: # %bb.3: # %exit2
; CHECK64-NEXT: xorl %eax, %eax
; CHECK64-NEXT: retq
-; CHECK64-NEXT: .LBB3_1: # %exit
+; CHECK64-NEXT: .LBB3_2: # %exit
; CHECK64-NEXT: movb $1, %al
; CHECK64-NEXT: retq
entry:
@@ -292,7 +284,7 @@ define i64 @test_two_live_flags(ptr %foo0, i64 %bar0, i64 %baz0, ptr %foo1, i64
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK32-NEXT: lock cmpxchg8b (%esi)
-; CHECK32-NEXT: sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; CHECK32-NEXT: setne {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: movl %ebp, %edx
; CHECK32-NEXT: movl %edi, %ecx
@@ -300,15 +292,17 @@ define i64 @test_two_live_flags(ptr %foo0, i64 %bar0, i64 %baz0, ptr %foo1, i64
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK32-NEXT: lock cmpxchg8b (%esi)
; CHECK32-NEXT: sete %al
-; CHECK32-NEXT: andb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
-; CHECK32-NEXT: cmpb $1, %al
-; CHECK32-NEXT: jne .LBB5_3
-; CHECK32-NEXT: # %bb.1: # %t
+; CHECK32-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; CHECK32-NEXT: jne .LBB5_4
+; CHECK32-NEXT: # %bb.1: # %entry
+; CHECK32-NEXT: testb %al, %al
+; CHECK32-NEXT: je .LBB5_4
+; CHECK32-NEXT: # %bb.2: # %t
; CHECK32-NEXT: movl $42, %eax
-; CHECK32-NEXT: jmp .LBB5_2
-; CHECK32-NEXT: .LBB5_3: # %f
+; CHECK32-NEXT: jmp .LBB5_3
+; CHECK32-NEXT: .LBB5_4: # %f
; CHECK32-NEXT: xorl %eax, %eax
-; CHECK32-NEXT: .LBB5_2: # %t
+; CHECK32-NEXT: .LBB5_3: # %t
; CHECK32-NEXT: xorl %edx, %edx
; CHECK32-NEXT: addl $4, %esp
; CHECK32-NEXT: popl %esi
@@ -321,17 +315,19 @@ define i64 @test_two_live_flags(ptr %foo0, i64 %bar0, i64 %baz0, ptr %foo1, i64
; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: movq %rsi, %rax
; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi)
-; CHECK64-NEXT: sete %dl
+; CHECK64-NEXT: setne %dl
; CHECK64-NEXT: movq %r8, %rax
; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx)
; CHECK64-NEXT: sete %al
-; CHECK64-NEXT: andb %dl, %al
-; CHECK64-NEXT: cmpb $1, %al
-; CHECK64-NEXT: jne .LBB5_2
-; CHECK64-NEXT: # %bb.1: # %t
+; CHECK64-NEXT: testb %dl, %dl
+; CHECK64-NEXT: jne .LBB5_3
+; CHECK64-NEXT: # %bb.1: # %entry
+; CHECK64-NEXT: testb %al, %al
+; CHECK64-NEXT: je .LBB5_3
+; CHECK64-NEXT: # %bb.2: # %t
; CHECK64-NEXT: movl $42, %eax
; CHECK64-NEXT: retq
-; CHECK64-NEXT: .LBB5_2: # %f
+; CHECK64-NEXT: .LBB5_3: # %f
; CHECK64-NEXT: xorl %eax, %eax
; CHECK64-NEXT: retq
entry:
@@ -357,6 +353,7 @@ define i1 @asm_clobbering_flags(ptr %mem) nounwind {
; CHECK32-NEXT: testl %edx, %edx
; CHECK32-NEXT: setg %al
; CHECK32-NEXT: #APP
+; CHECK32-NOT: rep
; CHECK32-NEXT: bsfl %edx, %edx
; CHECK32-NEXT: #NO_APP
; CHECK32-NEXT: movl %edx, (%ecx)
@@ -368,6 +365,7 @@ define i1 @asm_clobbering_flags(ptr %mem) nounwind {
; CHECK64-NEXT: testl %ecx, %ecx
; CHECK64-NEXT: setg %al
; CHECK64-NEXT: #APP
+; CHECK64-NOT: rep
; CHECK64-NEXT: bsfl %ecx, %ecx
; CHECK64-NEXT: #NO_APP
; CHECK64-NEXT: movl %ecx, (%rdi)
diff --git a/llvm/test/CodeGen/X86/pr33747.ll b/llvm/test/CodeGen/X86/pr33747.ll
index c8ba2b2e3a7909..e261486dd59246 100644
--- a/llvm/test/CodeGen/X86/pr33747.ll
+++ b/llvm/test/CodeGen/X86/pr33747.ll
@@ -5,19 +5,18 @@ define void @PR33747(ptr nocapture) {
; CHECK-LABEL: PR33747:
; CHECK: # %bb.0:
; CHECK-NEXT: movl 24(%rdi), %eax
-; CHECK-NEXT: leal 1(%rax), %ecx
-; CHECK-NEXT: cmpl $3, %ecx
-; CHECK-NEXT: setb %cl
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: testb %cl, %al
-; CHECK-NEXT: je .LBB0_2
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: jmp .LBB0_1
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: incl %eax
+; CHECK-NEXT: cmpl $3, %eax
+; CHECK-NEXT: jae .LBB0_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: jmp .LBB0_2
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp .LBB0_3
%2 = getelementptr inbounds i32, ptr %0, i64 6
%3 = load i32, ptr %2, align 4
%4 = add i32 %3, 1
diff --git a/llvm/test/CodeGen/X86/pr37025.ll b/llvm/test/CodeGen/X86/pr37025.ll
index 8ac28d6286a607..a758ddc91541bc 100644
--- a/llvm/test/CodeGen/X86/pr37025.ll
+++ b/llvm/test/CodeGen/X86/pr37025.ll
@@ -18,13 +18,11 @@ define void @test_dec_select(ptr nocapture %0, ptr readnone %1) {
; CHECK-LABEL: test_dec_select:
; CHECK: # %bb.0:
; CHECK-NEXT: lock decq (%rdi)
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: setne %cl
-; CHECK-NEXT: andb %al, %cl
-; CHECK-NEXT: cmpb $1, %cl
-; CHECK-NEXT: je func2 # TAILCALL
+; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: jne func2 # TAILCALL
+; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i64 1 seq_cst
%4 = icmp eq i64 %3, 1
@@ -46,11 +44,11 @@ define void @test_dec_select_commute(ptr nocapture %0, ptr readnone %1) {
; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: setne %cl
-; CHECK-NEXT: andb %al, %cl
-; CHECK-NEXT: cmpb $1, %cl
-; CHECK-NEXT: je func2 # TAILCALL
+; CHECK-NEXT: je .LBB1_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne func2 # TAILCALL
+; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i64 1 seq_cst
%4 = icmp eq i64 %3, 1
@@ -71,13 +69,12 @@ define void @test_dec_and(ptr nocapture %0, ptr readnone %1) {
; CHECK: # %bb.0:
; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: sete %al
-; CHECK-NEXT: notb %al
; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: orb %al, %cl
-; CHECK-NEXT: testb $1, %cl
-; CHECK-NEXT: je func2 # TAILCALL
+; CHECK-NEXT: je .LBB2_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne func2 # TAILCALL
+; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i64 1 seq_cst
%4 = icmp eq i64 %3, 1
@@ -97,14 +94,11 @@ define void @test_dec_and_commute(ptr nocapture %0, ptr readnone %1) {
; CHECK-LABEL: test_dec_and_commute:
; CHECK: # %bb.0:
; CHECK-NEXT: lock decq (%rdi)
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: notb %al
-; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: orb %al, %cl
-; CHECK-NEXT: testb $1, %cl
-; CHECK-NEXT: je func2 # TAILCALL
+; CHECK-NEXT: jne .LBB3_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: jne func2 # TAILCALL
+; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: retq
%3 = atomicrmw sub ptr %0, i64 1 seq_cst
%4 = icmp eq i64 %3, 1
diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index f64c70e8fc79a4..03629a353d84dc 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -25,126 +25,141 @@ define dso_local void @fn() {
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: # implicit-def: $ecx
; CHECK-NEXT: # implicit-def: $edi
-; CHECK-NEXT: # implicit-def: $dh
; CHECK-NEXT: # implicit-def: $al
; CHECK-NEXT: # kill: killed $al
+; CHECK-NEXT: # implicit-def: $al
; CHECK-NEXT: # implicit-def: $ebp
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_15: # %for.inc
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: .LBB0_16: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: movb %dh, %al
; CHECK-NEXT: .LBB0_1: # %for.cond
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_19 Depth 2
-; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_3
-; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: # Child Loop BB0_22 Depth 2
+; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: cmpb $8, %al
+; CHECK-NEXT: ja .LBB0_3
+; CHECK-NEXT: # %bb.2: # %for.cond
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; CHECK-NEXT: movl $.str, (%esp)
-; CHECK-NEXT: calll printf
-; CHECK-NEXT: # implicit-def: $eax
-; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: jne .LBB0_10
-; CHECK-NEXT: jmp .LBB0_6
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_3: # %if.end
+; CHECK-NEXT: testb %bl, %bl
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: # %bb.4: # %if.end
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl a
-; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: movb %cl, %dh
; CHECK-NEXT: movl $0, h
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
-; CHECK-NEXT: cmpb $8, %dh
-; CHECK-NEXT: jg .LBB0_7
-; CHECK-NEXT: # %bb.4: # %if.then13
+; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-NEXT: cmpb $8, %al
+; CHECK-NEXT: jg .LBB0_8
+; CHECK-NEXT: # %bb.5: # %if.then13
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: movl $.str, (%esp)
-; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: calll printf
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
; CHECK-NEXT: testb %bl, %bl
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # implicit-def: $eax
-; CHECK-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; CHECK-NEXT: jne .LBB0_15
+; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
+; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: movb %dh, %dl
+; CHECK-NEXT: je .LBB0_6
+; CHECK-NEXT: jmp .LBB0_18
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: # %bb.5: # %for.cond35
+; CHECK-NEXT: .LBB0_3: # %if.then
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movl $.str, (%esp)
+; CHECK-NEXT: calll printf
+; CHECK-NEXT: # implicit-def: $eax
+; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; CHECK-NEXT: .LBB0_6: # %for.cond35
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB0_6
-; CHECK-NEXT: .LBB0_10: # %af
+; CHECK-NEXT: je .LBB0_7
+; CHECK-NEXT: .LBB0_11: # %af
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_11
-; CHECK-NEXT: .LBB0_16: # %if.end39
+; CHECK-NEXT: jne .LBB0_12
+; CHECK-NEXT: .LBB0_19: # %if.end39
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB0_18
-; CHECK-NEXT: # %bb.17: # %if.then41
+; CHECK-NEXT: je .LBB0_21
+; CHECK-NEXT: # %bb.20: # %if.then41
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $.str, (%esp)
; CHECK-NEXT: calll printf
-; CHECK-NEXT: .LBB0_18: # %for.end46
+; CHECK-NEXT: .LBB0_21: # %for.end46
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: # implicit-def: $al
; CHECK-NEXT: # implicit-def: $dh
-; CHECK-NEXT: # implicit-def: $dl
; CHECK-NEXT: # implicit-def: $ebp
-; CHECK-NEXT: jmp .LBB0_19
+; CHECK-NEXT: jmp .LBB0_22
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_7: # %if.end21
+; CHECK-NEXT: .LBB0_8: # %if.end21
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: # implicit-def: $ebp
-; CHECK-NEXT: jmp .LBB0_8
+; CHECK-NEXT: jmp .LBB0_9
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_6: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
+; CHECK-NEXT: movb %dl, %dh
+; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_19: # %for.cond47
+; CHECK-NEXT: .LBB0_22: # %for.cond47
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_19
-; CHECK-NEXT: .LBB0_8: # %ae
+; CHECK-NEXT: jne .LBB0_22
+; CHECK-NEXT: # %bb.23: # %for.cond47
+; CHECK-NEXT: # in Loop: Header=BB0_22 Depth=2
+; CHECK-NEXT: jne .LBB0_22
+; CHECK-NEXT: .LBB0_9: # %ae
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_9
-; CHECK-NEXT: # %bb.12: # %if.end26
+; CHECK-NEXT: jne .LBB0_10
+; CHECK-NEXT: # %bb.13: # %if.end26
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: testb %dh, %dh
-; CHECK-NEXT: je .LBB0_15
-; CHECK-NEXT: # %bb.13: # %if.end26
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB0_14
+; CHECK-NEXT: # %bb.15: # %if.end26
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testl %ebp, %ebp
-; CHECK-NEXT: jne .LBB0_15
-; CHECK-NEXT: # %bb.14: # %if.then31
+; CHECK-NEXT: jne .LBB0_16
+; CHECK-NEXT: # %bb.17: # %if.then31
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: xorl %ebp, %ebp
-; CHECK-NEXT: jmp .LBB0_15
+; CHECK-NEXT: .LBB0_18: # %for.inc
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movb %dh, %al
+; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_9: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: # implicit-def: $eax
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: je .LBB0_16
-; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: je .LBB0_19
+; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: # implicit-def: $edi
; CHECK-NEXT: # implicit-def: $cl
; CHECK-NEXT: # kill: killed $cl
; CHECK-NEXT: # implicit-def: $dl
; CHECK-NEXT: # implicit-def: $ebp
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: jne .LBB0_10
-; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: jne .LBB0_11
+; CHECK-NEXT: jmp .LBB0_7
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_14: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: movb %dh, %al
+; CHECK-NEXT: jmp .LBB0_1
entry:
br label %for.cond
diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll
index c98aae7fbf4059..3faa493ebccd0d 100644
--- a/llvm/test/CodeGen/X86/setcc-logic.ll
+++ b/llvm/test/CodeGen/X86/setcc-logic.ll
@@ -132,12 +132,15 @@ return:
define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_sign_bits_clear_branch:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: orl %esi, %edi
-; CHECK-NEXT: js .LBB9_2
-; CHECK-NEXT: # %bb.1: # %bb1
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: js .LBB9_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: js .LBB9_3
+; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB9_2: # %return
+; CHECK-NEXT: .LBB9_3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
@@ -156,13 +159,15 @@ return:
define i32 @all_bits_set_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_bits_set_branch:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl $-1, %edi
-; CHECK-NEXT: jne .LBB10_2
-; CHECK-NEXT: # %bb.1: # %bb1
+; CHECK-NEXT: jne .LBB10_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: cmpl $-1, %esi
+; CHECK-NEXT: jne .LBB10_3
+; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB10_2: # %return
+; CHECK-NEXT: .LBB10_3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
@@ -181,12 +186,15 @@ return:
define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: all_sign_bits_set_branch:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: testl %esi, %edi
-; CHECK-NEXT: jns .LBB11_2
-; CHECK-NEXT: # %bb.1: # %bb1
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: jns .LBB11_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: jns .LBB11_3
+; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $4, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB11_2: # %return
+; CHECK-NEXT: .LBB11_3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
entry:
@@ -230,14 +238,17 @@ return:
define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_sign_bits_set_branch:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: orl %esi, %edi
-; CHECK-NEXT: jns .LBB13_2
-; CHECK-NEXT: # %bb.1: # %bb1
-; CHECK-NEXT: movl $4, %eax
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB13_2: # %return
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: js .LBB13_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: js .LBB13_2
+; CHECK-NEXT: # %bb.3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB13_2: # %bb1
+; CHECK-NEXT: movl $4, %eax
+; CHECK-NEXT: retq
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
@@ -254,15 +265,17 @@ return:
define i32 @any_bits_clear_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_bits_clear_branch:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpl $-1, %edi
-; CHECK-NEXT: je .LBB14_2
-; CHECK-NEXT: # %bb.1: # %bb1
-; CHECK-NEXT: movl $4, %eax
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB14_2: # %return
+; CHECK-NEXT: jne .LBB14_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: cmpl $-1, %esi
+; CHECK-NEXT: jne .LBB14_2
+; CHECK-NEXT: # %bb.3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB14_2: # %bb1
+; CHECK-NEXT: movl $4, %eax
+; CHECK-NEXT: retq
entry:
%a = icmp ne i32 %P, -1
%b = icmp ne i32 %Q, -1
@@ -279,14 +292,17 @@ return:
define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q) nounwind {
; CHECK-LABEL: any_sign_bits_clear_branch:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: testl %esi, %edi
-; CHECK-NEXT: js .LBB15_2
-; CHECK-NEXT: # %bb.1: # %bb1
-; CHECK-NEXT: movl $4, %eax
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB15_2: # %return
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: jns .LBB15_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: jns .LBB15_2
+; CHECK-NEXT: # %bb.3: # %return
; CHECK-NEXT: movl $192, %eax
; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB15_2: # %bb1
+; CHECK-NEXT: movl $4, %eax
+; CHECK-NEXT: retq
entry:
%a = icmp sgt i32 %P, -1
%b = icmp sgt i32 %Q, -1
diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll
index 1489b0295e9356..75252309790b1b 100644
--- a/llvm/test/CodeGen/X86/swifterror.ll
+++ b/llvm/test/CodeGen/X86/swifterror.ll
@@ -1259,7 +1259,12 @@ entry:
define swiftcc void @dont_crash_on_new_isel_blocks(ptr nocapture swifterror, i1, ptr) {
; CHECK-APPLE-LABEL: dont_crash_on_new_isel_blocks:
; CHECK-APPLE: ## %bb.0: ## %entry
+; CHECK-APPLE-NEXT: xorl %eax, %eax
+; CHECK-APPLE-NEXT: testb %al, %al
+; CHECK-APPLE-NEXT: jne LBB15_2
+; CHECK-APPLE-NEXT: ## %bb.1: ## %entry
; CHECK-APPLE-NEXT: testb $1, %dil
+; CHECK-APPLE-NEXT: LBB15_2: ## %cont
; CHECK-APPLE-NEXT: pushq %rax
; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 16
; CHECK-APPLE-NEXT: callq *%rax
@@ -1285,7 +1290,12 @@ define swiftcc void @dont_crash_on_new_isel_blocks(ptr nocapture swifterror, i1,
;
; CHECK-i386-LABEL: dont_crash_on_new_isel_blocks:
; CHECK-i386: ## %bb.0: ## %entry
+; CHECK-i386-NEXT: xorl %eax, %eax
+; CHECK-i386-NEXT: testb %al, %al
+; CHECK-i386-NEXT: jne LBB15_2
+; CHECK-i386-NEXT: ## %bb.1: ## %entry
; CHECK-i386-NEXT: testb $1, 8(%esp)
+; CHECK-i386-NEXT: LBB15_2: ## %cont
; CHECK-i386-NEXT: jmpl *%eax ## TAILCALL
entry:
%3 = or i1 false, %1
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 8d84e887d3f279..9cd37315181209 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -91,97 +91,116 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jne .LBB1_12
+; CHECK-NEXT: jne .LBB1_24
; CHECK-NEXT: # %bb.1: # %if.end19
-; CHECK-NEXT: movl (%rax), %r12d
-; CHECK-NEXT: leal (,%r12,4), %ebp
-; CHECK-NEXT: movl %ebp, %r15d
+; CHECK-NEXT: movl %esi, %ebp
+; CHECK-NEXT: movq %rdi, %r15
+; CHECK-NEXT: movl (%rax), %r13d
+; CHECK-NEXT: leal (,%r13,4), %ebx
+; CHECK-NEXT: movl %ebx, %r12d
; CHECK-NEXT: movl $1, %esi
-; CHECK-NEXT: movq %r15, %rdi
+; CHECK-NEXT: movq %r12, %rdi
; CHECK-NEXT: callq cli_calloc at PLT
+; CHECK-NEXT: testl %ebp, %ebp
+; CHECK-NEXT: je .LBB1_23
+; CHECK-NEXT: # %bb.2: # %if.end19
+; CHECK-NEXT: testl %r13d, %r13d
+; CHECK-NEXT: je .LBB1_23
+; CHECK-NEXT: # %bb.3: # %if.end19
; CHECK-NEXT: movq %rax, %r14
-; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jne .LBB1_12
-; CHECK-NEXT: # %bb.2: # %if.end50
+; CHECK-NEXT: jne .LBB1_23
+; CHECK-NEXT: # %bb.4: # %if.end19
+; CHECK-NEXT: cmpq %r15, %r14
+; CHECK-NEXT: jb .LBB1_23
+; CHECK-NEXT: # %bb.5: # %if.end50
; CHECK-NEXT: movq %r14, %rdi
-; CHECK-NEXT: movq %r15, %rdx
+; CHECK-NEXT: movq %r12, %rdx
; CHECK-NEXT: callq memcpy at PLT
-; CHECK-NEXT: cmpl $4, %ebp
-; CHECK-NEXT: jb .LBB1_19
-; CHECK-NEXT: # %bb.3: # %shared_preheader
+; CHECK-NEXT: cmpl $4, %ebx
+; CHECK-NEXT: jb .LBB1_26
+; CHECK-NEXT: # %bb.6: # %shared_preheader
; CHECK-NEXT: movb $32, %cl
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: jmp .LBB1_4
+; CHECK-NEXT: jmp .LBB1_8
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB1_15: # %merge_predecessor_split
-; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1
+; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split
+; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
; CHECK-NEXT: movb $32, %cl
-; CHECK-NEXT: .LBB1_4: # %outer_loop_header
+; CHECK-NEXT: .LBB1_8: # %outer_loop_header
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_8 Depth 2
-; CHECK-NEXT: testl %r12d, %r12d
-; CHECK-NEXT: je .LBB1_5
+; CHECK-NEXT: # Child Loop BB1_9 Depth 2
+; CHECK-NEXT: testl %r13d, %r13d
+; CHECK-NEXT: je .LBB1_16
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB1_8: # %shared_loop_header
-; CHECK-NEXT: # Parent Loop BB1_4 Depth=1
+; CHECK-NEXT: .LBB1_9: # %shared_loop_header
+; CHECK-NEXT: # Parent Loop BB1_8 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: testq %r14, %r14
-; CHECK-NEXT: jne .LBB1_18
-; CHECK-NEXT: # %bb.9: # %inner_loop_body
-; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=2
+; CHECK-NEXT: jne .LBB1_25
+; CHECK-NEXT: # %bb.10: # %inner_loop_body
+; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=2
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB1_8
-; CHECK-NEXT: # %bb.10: # %if.end96.i
-; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: cmpl $3, %r12d
-; CHECK-NEXT: jae .LBB1_11
-; CHECK-NEXT: # %bb.13: # %if.end287.i
-; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1
+; CHECK-NEXT: je .LBB1_9
+; CHECK-NEXT: # %bb.11: # %if.end96.i
+; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT: cmpl $3, %r13d
+; CHECK-NEXT: jae .LBB1_20
+; CHECK-NEXT: # %bb.12: # %if.end287.i
+; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: # implicit-def: $cl
-; CHECK-NEXT: jne .LBB1_4
-; CHECK-NEXT: # %bb.14: # %if.end308.i
-; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1
+; CHECK-NEXT: jne .LBB1_8
+; CHECK-NEXT: # %bb.13: # %if.end308.i
+; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB1_15
-; CHECK-NEXT: # %bb.16: # %if.end335.i
-; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1
+; CHECK-NEXT: je .LBB1_7
+; CHECK-NEXT: # %bb.14: # %if.end335.i
+; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: jne .LBB1_4
-; CHECK-NEXT: # %bb.17: # %merge_other
-; CHECK-NEXT: # in Loop: Header=BB1_4 Depth=1
+; CHECK-NEXT: jne .LBB1_8
+; CHECK-NEXT: # %bb.15: # %merge_other
+; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
; CHECK-NEXT: # implicit-def: $cl
-; CHECK-NEXT: jmp .LBB1_4
-; CHECK-NEXT: .LBB1_5: # %while.cond.us1412.i
+; CHECK-NEXT: jmp .LBB1_8
+; CHECK-NEXT: .LBB1_23:
+; CHECK-NEXT: movl $1, %ebx
+; CHECK-NEXT: jmp .LBB1_24
+; CHECK-NEXT: .LBB1_16: # %while.cond.us1412.i
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jne .LBB1_7
-; CHECK-NEXT: # %bb.6: # %while.cond.us1412.i
+; CHECK-NEXT: movl $1, %ebx
+; CHECK-NEXT: jne .LBB1_18
+; CHECK-NEXT: # %bb.17: # %while.cond.us1412.i
; CHECK-NEXT: decb %cl
-; CHECK-NEXT: jne .LBB1_12
-; CHECK-NEXT: .LBB1_7: # %if.end41.us1436.i
-; CHECK-NEXT: .LBB1_11: # %if.then99.i
+; CHECK-NEXT: jne .LBB1_24
+; CHECK-NEXT: .LBB1_18: # %if.end41.us1436.i
+; CHECK-NEXT: .LBB1_20: # %if.then99.i
; CHECK-NEXT: movq .str.6 at GOTPCREL(%rip), %rdi
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: callq cli_dbgmsg at PLT
-; CHECK-NEXT: .LBB1_12: # %cleanup
+; CHECK-NEXT: .LBB1_24: # %cleanup
; CHECK-NEXT: movl %ebx, %eax
+; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB1_18: # %wunpsect.exit.thread.loopexit389
-; CHECK-NEXT: .LBB1_19: # %wunpsect.exit.thread.loopexit391
+; CHECK-NEXT: .LBB1_25: # %wunpsect.exit.thread.loopexit389
+; CHECK-NEXT: .LBB1_26: # %wunpsect.exit.thread.loopexit391
entry:
%0 = load i32, ptr undef, align 4
%mul = shl nsw i32 %0, 2
diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index d9ab2f7d1f5fb6..d54110d1fa8119 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -300,9 +300,10 @@ define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind {
; CHECK-NEXT: cmpl $23, %ecx
; CHECK-NEXT: jne .LBB3_9
; CHECK-NEXT: .LBB3_16: # %lvalue_p.exit4
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB3_9
+; CHECK-NEXT: # %bb.17: # %lvalue_p.exit4
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: orb %al, %cl
entry:
%tmp4 = load i8, ptr null, align 8 ; <i8> [#uses=3]
switch i8 %tmp4, label %bb3 [
diff --git a/llvm/test/CodeGen/X86/tailcall-extract.ll b/llvm/test/CodeGen/X86/tailcall-extract.ll
index aff6146198c899..7a6c75c44ca7dd 100644
--- a/llvm/test/CodeGen/X86/tailcall-extract.ll
+++ b/llvm/test/CodeGen/X86/tailcall-extract.ll
@@ -6,7 +6,7 @@
; containing call. And later tail call can be generated.
; CHECK-LABEL: test1:
-; CHECK: jne foo # TAILCALL
+; CHECK: je foo # TAILCALL
; CHECK: jmp bar # TAILCALL
; OPT-LABEL: test1
@@ -48,8 +48,8 @@ exit:
; can't be duplicated.
; CHECK-LABEL: test2:
-; CHECK: callq foo
; CHECK: callq bar
+; CHECK: callq foo
; OPT-LABEL: test2
; OPT: if.then.i:
@@ -93,7 +93,7 @@ exit:
; offset, so the exit block can still be duplicated, and tail call generated.
; CHECK-LABEL: test3:
-; CHECK: jne qux # TAILCALL
+; CHECK: je qux # TAILCALL
; CHECK: jmp baz # TAILCALL
; OPT-LABEL: test3
@@ -136,8 +136,8 @@ exit:
; block can't be duplicated.
; CHECK-LABEL: test4:
-; CHECK: callq qux
; CHECK: callq baz
+; CHECK: callq qux
; OPT-LABEL: test4
; OPT: if.then.i:
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index 953a0d65c5386c..ed43cabbdaee11 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -48,39 +48,37 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
; CHECK-X86: ## %bb.0:
; CHECK-X86-NEXT: subl $12, %esp
; CHECK-X86-NEXT: .cfi_def_cfa_offset 16
-; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; CHECK-X86-NEXT: cmpb $123, {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT: setne %cl
-; CHECK-X86-NEXT: testl $263, %eax ## imm = 0x107
-; CHECK-X86-NEXT: setne %al
-; CHECK-X86-NEXT: testb %cl, %al
-; CHECK-X86-NEXT: jne LBB1_2
-; CHECK-X86-NEXT: ## %bb.1: ## %yes
-; CHECK-X86-NEXT: addl $12, %esp
-; CHECK-X86-NEXT: retl
-; CHECK-X86-NEXT: LBB1_2: ## %no
+; CHECK-X86-NEXT: sete %al
+; CHECK-X86-NEXT: testl $263, %ecx ## imm = 0x107
+; CHECK-X86-NEXT: je LBB1_3
+; CHECK-X86-NEXT: ## %bb.1:
+; CHECK-X86-NEXT: testb %al, %al
+; CHECK-X86-NEXT: jne LBB1_3
+; CHECK-X86-NEXT: ## %bb.2: ## %no
; CHECK-X86-NEXT: calll _bar
+; CHECK-X86-NEXT: LBB1_3: ## %yes
; CHECK-X86-NEXT: addl $12, %esp
; CHECK-X86-NEXT: retl
;
; CHECK-X64-LABEL: fail:
; CHECK-X64: # %bb.0:
+; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107
+; CHECK-X64-NEXT: je .LBB1_3
+; CHECK-X64-NEXT: # %bb.1:
; CHECK-X64-NEXT: pslld $8, %xmm0
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
-; CHECK-X64-NEXT: xorb $1, %al
-; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107
-; CHECK-X64-NEXT: setne %cl
-; CHECK-X64-NEXT: testb %al, %cl
-; CHECK-X64-NEXT: jne .LBB1_2
-; CHECK-X64-NEXT: # %bb.1: # %yes
-; CHECK-X64-NEXT: retq
-; CHECK-X64-NEXT: .LBB1_2: # %no
+; CHECK-X64-NEXT: testb $1, %al
+; CHECK-X64-NEXT: jne .LBB1_3
+; CHECK-X64-NEXT: # %bb.2: # %no
; CHECK-X64-NEXT: pushq %rax
; CHECK-X64-NEXT: .cfi_def_cfa_offset 16
; CHECK-X64-NEXT: callq bar at PLT
; CHECK-X64-NEXT: popq %rax
; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
+; CHECK-X64-NEXT: .LBB1_3: # %yes
; CHECK-X64-NEXT: retq
%1 = icmp eq <2 x i8> %b, <i8 40, i8 123>
%2 = extractelement <2 x i1> %1, i32 1
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
index 3349d31cad4b97..b9e490888d9bfc 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
@@ -181,40 +181,38 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key
; CHECK-LABEL: segmentedStack:
; CHECK: ## %bb.0:
; CHECK-NEXT: cmpq %gs:816, %rsp
-; CHECK-NEXT: jbe LBB3_6
+; CHECK-NEXT: jbe LBB3_7
; CHECK-NEXT: LBB3_1: ## %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: orb %al, %cl
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: sete %al
-; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: jne LBB3_4
-; CHECK-NEXT: ## %bb.2: ## %if.end4.i
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je LBB3_5
+; CHECK-NEXT: ## %bb.2: ## %entry
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: je LBB3_5
+; CHECK-NEXT: ## %bb.3: ## %if.end4.i
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: cmpq 8(%rsi), %rdx
-; CHECK-NEXT: jne LBB3_5
-; CHECK-NEXT: ## %bb.3: ## %land.rhs.i.i
+; CHECK-NEXT: jne LBB3_6
+; CHECK-NEXT: ## %bb.4: ## %land.rhs.i.i
; CHECK-NEXT: movq (%rsi), %rsi
; CHECK-NEXT: movq (%rdi), %rdi
; CHECK-NEXT: callq _memcmp
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: sete %al
-; CHECK-NEXT: LBB3_4: ## %__go_ptr_strings_equal.exit
+; CHECK-NEXT: LBB3_5: ## %__go_ptr_strings_equal.exit
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB3_5:
+; CHECK-NEXT: LBB3_6:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB3_6:
+; CHECK-NEXT: LBB3_7:
; CHECK-NEXT: movl $8, %r10d
; CHECK-NEXT: movl $0, %r11d
; CHECK-NEXT: callq ___morestack
@@ -224,43 +222,41 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key
; NOCOMPACTUNWIND-LABEL: segmentedStack:
; NOCOMPACTUNWIND: # %bb.0:
; NOCOMPACTUNWIND-NEXT: cmpq %fs:112, %rsp
-; NOCOMPACTUNWIND-NEXT: jbe .LBB3_6
+; NOCOMPACTUNWIND-NEXT: jbe .LBB3_7
; NOCOMPACTUNWIND-NEXT: .LBB3_1: # %entry
; NOCOMPACTUNWIND-NEXT: pushq %rax
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16
-; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi
-; NOCOMPACTUNWIND-NEXT: sete %al
-; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi
-; NOCOMPACTUNWIND-NEXT: sete %cl
-; NOCOMPACTUNWIND-NEXT: orb %al, %cl
; NOCOMPACTUNWIND-NEXT: movq %rdi, %rax
; NOCOMPACTUNWIND-NEXT: orq %rsi, %rax
; NOCOMPACTUNWIND-NEXT: sete %al
-; NOCOMPACTUNWIND-NEXT: testb %cl, %cl
-; NOCOMPACTUNWIND-NEXT: jne .LBB3_4
-; NOCOMPACTUNWIND-NEXT: # %bb.2: # %if.end4.i
+; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi
+; NOCOMPACTUNWIND-NEXT: je .LBB3_5
+; NOCOMPACTUNWIND-NEXT: # %bb.2: # %entry
+; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi
+; NOCOMPACTUNWIND-NEXT: je .LBB3_5
+; NOCOMPACTUNWIND-NEXT: # %bb.3: # %if.end4.i
; NOCOMPACTUNWIND-NEXT: movq 8(%rdi), %rdx
; NOCOMPACTUNWIND-NEXT: cmpq 8(%rsi), %rdx
-; NOCOMPACTUNWIND-NEXT: jne .LBB3_5
-; NOCOMPACTUNWIND-NEXT: # %bb.3: # %land.rhs.i.i
+; NOCOMPACTUNWIND-NEXT: jne .LBB3_6
+; NOCOMPACTUNWIND-NEXT: # %bb.4: # %land.rhs.i.i
; NOCOMPACTUNWIND-NEXT: movq (%rsi), %rsi
; NOCOMPACTUNWIND-NEXT: movq (%rdi), %rdi
; NOCOMPACTUNWIND-NEXT: callq memcmp at PLT
; NOCOMPACTUNWIND-NEXT: testl %eax, %eax
; NOCOMPACTUNWIND-NEXT: sete %al
-; NOCOMPACTUNWIND-NEXT: .LBB3_4: # %__go_ptr_strings_equal.exit
+; NOCOMPACTUNWIND-NEXT: .LBB3_5: # %__go_ptr_strings_equal.exit
; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax
; NOCOMPACTUNWIND-NEXT: popq %rcx
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8
; NOCOMPACTUNWIND-NEXT: retq
-; NOCOMPACTUNWIND-NEXT: .LBB3_5:
+; NOCOMPACTUNWIND-NEXT: .LBB3_6:
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16
; NOCOMPACTUNWIND-NEXT: xorl %eax, %eax
; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax
; NOCOMPACTUNWIND-NEXT: popq %rcx
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8
; NOCOMPACTUNWIND-NEXT: retq
-; NOCOMPACTUNWIND-NEXT: .LBB3_6:
+; NOCOMPACTUNWIND-NEXT: .LBB3_7:
; NOCOMPACTUNWIND-NEXT: movl $8, %r10d
; NOCOMPACTUNWIND-NEXT: movl $0, %r11d
; NOCOMPACTUNWIND-NEXT: callq __morestack
More information about the llvm-commits
mailing list