[llvm] [X86] Don't always seperate conditions in `(br (and/or cond0, cond1))` into seperate branches (PR #81689)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 13 15:56:49 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
It makes sense to split if the cost of computing `cond1` is high
(proportionally to how likely `cond0` is), but it doesn't really make
sense to introduce a second branch if its only a few instructions.
Splitting can also get in the way of potentially folding patterns.
This patch introduces some logic to try to check if the cost of
computing `cond1` is relatively low, and if so don't split the
branches.
Modest improvement on clang bootstrap build:
https://llvm-compile-time-tracker.com/compare.php?from=79ce933114e46c891a5632f7ad4a004b93a5b808&to=978278eabc0bafe2f390ca8fcdad24154f954020&stat=cycles
Average stage2-O3: 0.59% Improvement (cycles)
Average stage2-O0-g: 1.20% Improvement (cycles)
Likewise on llvm-test-suite on SKX saw a net 0.84% improvement (cycles)
There is also a modest compile time improvement with this patch:
https://llvm-compile-time-tracker.com/compare.php?from=79ce933114e46c891a5632f7ad4a004b93a5b808&to=978278eabc0bafe2f390ca8fcdad24154f954020&stat=instructions%3Au
Note that the stage2 instruction count increases is expected, this
patch trades instructions for decreasing branch-misses (which is
proportionately lower):
https://llvm-compile-time-tracker.com/compare.php?from=79ce933114e46c891a5632f7ad4a004b93a5b808&to=978278eabc0bafe2f390ca8fcdad24154f954020&stat=branch-misses
NB: This will also likely help for APX targets with the new `CCMP` and
`CTEST` instructions.
---
Patch is 104.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81689.diff
27 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+7)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+5-2)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+159)
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+5)
- (modified) llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll (+6-5)
- (modified) llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll (+56-53)
- (modified) llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll (+5-8)
- (modified) llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll (+3-2)
- (modified) llvm/test/CodeGen/X86/avx-cmp.ll (+10-17)
- (modified) llvm/test/CodeGen/X86/cmp.ll (+30-22)
- (modified) llvm/test/CodeGen/X86/dagcombine-and-setcc.ll (+2-1)
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+154-152)
- (modified) llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll (+31-39)
- (modified) llvm/test/CodeGen/X86/lsr-addrecloops.ll (+94-4)
- (modified) llvm/test/CodeGen/X86/movmsk-cmp.ll (+42-55)
- (modified) llvm/test/CodeGen/X86/or-branch.ll (+5-4)
- (modified) llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll (+72-70)
- (modified) llvm/test/CodeGen/X86/pr33747.ll (+9-8)
- (modified) llvm/test/CodeGen/X86/pr37025.ll (+22-16)
- (modified) llvm/test/CodeGen/X86/pr38795.ll (+58-73)
- (modified) llvm/test/CodeGen/X86/setcc-logic.ll (+30-46)
- (modified) llvm/test/CodeGen/X86/swifterror.ll (-10)
- (modified) llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll (+50-69)
- (modified) llvm/test/CodeGen/X86/tail-opts.ll (+2-3)
- (modified) llvm/test/CodeGen/X86/tailcall-extract.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/test-shrink-bug.ll (+18-16)
- (modified) llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll (+28-24)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 612433b54f6e44..85288a3ae519ee 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -596,6 +596,13 @@ class TargetLoweringBase {
/// avoided.
bool isJumpExpensive() const { return JumpIsExpensive; }
+ virtual bool keepJumpConditionsTogether(const FunctionLoweringInfo &,
+ const BranchInst &,
+ Instruction::BinaryOps, const Value *,
+ const Value *) const {
+ return false;
+ }
+
/// Return true if selects are only cheaper than branches if the branch is
/// unlikely to be predicted right.
bool isPredictableSelectExpensive() const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 28664b2ed9052d..12a6ec5f7dfb3b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2646,8 +2646,11 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
Opcode = Instruction::Or;
- if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
- match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ if (Opcode &&
+ !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value()))) &&
+ !DAG.getTargetLoweringInfo().keepJumpConditionsTogether(
+ FuncInfo, I, Opcode, BOp0, BOp1)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 18f9871b2bd0c3..8ff48630cfc923 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27,9 +27,12 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -55,6 +58,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -77,6 +81,24 @@ static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"alignment set by x86-experimental-pref-loop-alignment."),
cl::Hidden);
+static cl::opt<int> BrMergingBaseCostThresh(
+ "x86-cond-base", cl::init(1),
+ cl::desc(
+ "Base."),
+ cl::Hidden);
+
+static cl::opt<int> BrMergingLikelyBias(
+ "x86-cond-likely-bias", cl::init(0),
+ cl::desc(
+ "Likely."),
+ cl::Hidden);
+
+static cl::opt<int> BrMergingUnlikelyBias(
+ "x86-cond-unlikely-bias", cl::init(1),
+ cl::desc(
+ "Unlikely."),
+ cl::Hidden);
+
static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -3339,6 +3361,143 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand(
return ISD::SRL;
}
+// Collect dependings on V recursively. This is used for the cost analysis in
+// `keepJumpConditionsTogether`.
+static bool
+collectDeps(SmallPtrSet<const Instruction *, 8> *Deps, const Value *V,
+ SmallPtrSet<const Instruction *, 8> *Necessary = nullptr,
+ unsigned Depth = 0) {
+ // Return false if we have an incomplete count.
+ if (Depth >= 6)
+ return false;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (I == nullptr)
+ return true;
+
+ if (Necessary != nullptr) {
+ // This instruction is necessary for the other side of the condition so
+ // don't count it.
+ if (Necessary->contains(I))
+ return true;
+ }
+
+ // Already added this dep.
+ if (!Deps->insert(I).second)
+ return true;
+
+ for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx)
+ if (!collectDeps(Deps, I->getOperand(OpIdx), Necessary, Depth + 1))
+ return false;
+ return true;
+}
+
+bool X86TargetLowering::keepJumpConditionsTogether(
+ const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
+ Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const {
+ using namespace llvm::PatternMatch;
+ if (I.getNumSuccessors() != 2)
+ return false;
+
+ // Baseline cost. This is properly arbitrary.
+ InstructionCost CostThresh = BrMergingBaseCostThresh.getValue();
+ if (BrMergingBaseCostThresh.getValue() < 0)
+ return false;
+
+ // a == b && c == d can be efficiently combined.
+ ICmpInst::Predicate Pred;
+ if (Opc == Instruction::And &&
+ match(Lhs, m_ICmp(Pred, m_Value(), m_Value())) &&
+ Pred == ICmpInst::ICMP_EQ &&
+ match(Rhs, m_ICmp(Pred, m_Value(), m_Value())) &&
+ Pred == ICmpInst::ICMP_EQ)
+ CostThresh += 1;
+
+ BranchProbabilityInfo *BPI = nullptr;
+ if (BrMergingLikelyBias.getValue() || BrMergingUnlikelyBias.getValue())
+ BPI = FuncInfo.BPI;
+ if (BPI != nullptr) {
+ BasicBlock *IfFalse = I.getSuccessor(0);
+ BasicBlock *IfTrue = I.getSuccessor(1);
+
+ std::optional<bool> Likely;
+ if (BPI->isEdgeHot(I.getParent(), IfTrue))
+ Likely = true;
+ else if (BPI->isEdgeHot(I.getParent(), IfFalse))
+ Likely = false;
+
+ if (Likely) {
+ if (Opc == (*Likely ? Instruction::And : Instruction::Or))
+ // Its likely we will have to compute both lhs and rhs of condition
+ CostThresh += BrMergingLikelyBias.getValue();
+ else {
+ // Its likely we will get an early out.
+ CostThresh -= BrMergingUnlikelyBias.getValue();
+ if (BrMergingUnlikelyBias.getValue() < 0) {
+ return false;
+ }
+ }
+ }
+ }
+
+ if (CostThresh <= 0)
+ return false;
+
+ // Collect "all" instructions that lhs condition is dependent on.
+ SmallPtrSet<const Instruction *, 8> LhsDeps, RhsDeps;
+ collectDeps(&LhsDeps, Lhs);
+ // Collect "all" instructions that rhs condition is dependent on AND are
+ // dependencies of lhs. This gives us an estimate on which instructions we
+ // stand to save by splitting the condition.
+ if (!collectDeps(&RhsDeps, Rhs, &LhsDeps))
+ return false;
+ // Add the compare instruction itself unless its a dependency on the LHS.
+ if (const auto *RhsI = dyn_cast<Instruction>(Rhs))
+ if (!LhsDeps.contains(RhsI))
+ RhsDeps.insert(RhsI);
+ const auto &TTI = getTargetMachine().getTargetTransformInfo(*I.getFunction());
+
+ InstructionCost CostOfIncluding = 0;
+ // See if this instruction will need to computed independently of whether RHS
+ // is.
+ auto ShouldCountInsn = [&RhsDeps](const Instruction *Ins) {
+ for (const auto *U : Ins->users()) {
+ // If user is independent of RHS calculation we don't need to count it.
+ if (auto *UIns = dyn_cast<Instruction>(U))
+ if (!RhsDeps.contains(UIns))
+ return false;
+ }
+ return true;
+ };
+
+ // Prune instructions from RHS Deps that are dependencies of unrelated
+ // instructions.
+ const unsigned MaxPruneIters = 8;
+ // Stop after a certain point. No incorrectness from including too many
+ // instructions.
+ for (unsigned PruneIters = 0; PruneIters < MaxPruneIters; ++PruneIters) {
+ const Instruction *ToDrop = nullptr;
+ for (const auto *Ins : RhsDeps) {
+ if (!ShouldCountInsn(Ins)) {
+ ToDrop = Ins;
+ break;
+ }
+ }
+ if (ToDrop == nullptr)
+ break;
+ RhsDeps.erase(ToDrop);
+ }
+
+ for (const auto *Ins : RhsDeps) {
+ CostOfIncluding +=
+ TTI.getInstructionCost(Ins, TargetTransformInfo::TCK_Latency);
+
+ if (CostOfIncluding > CostThresh)
+ return false;
+ }
+ return true;
+}
+
bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
return N->getOpcode() != ISD::FP_EXTEND;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index f93c54781846bf..f536c8a6766b68 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1150,6 +1150,11 @@ namespace llvm {
bool preferScalarizeSplat(SDNode *N) const override;
+ bool keepJumpConditionsTogether(const FunctionLoweringInfo &,
+ const BranchInst &, Instruction::BinaryOps,
+ const Value *,
+ const Value *) const override;
+
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
diff --git a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index 0044d1c3568377..e6f28c2057f775 100644
--- a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -18,15 +18,16 @@ define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp
; CHECK-NEXT: movl _block, %esi
; CHECK-NEXT: movb %al, 1(%esi,%edx)
; CHECK-NEXT: cmpl %ecx, _last
-; CHECK-NEXT: jge LBB0_3
-; CHECK-NEXT: ## %bb.1: ## %label.0
+; CHECK-NEXT: setl %cl
; CHECK-NEXT: cmpl $257, %eax ## imm = 0x101
-; CHECK-NEXT: je LBB0_3
-; CHECK-NEXT: ## %bb.2: ## %label.0.no_exit.1_crit_edge.exitStub
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb %al, %cl
+; CHECK-NEXT: je LBB0_2
+; CHECK-NEXT: ## %bb.1: ## %label.0.no_exit.1_crit_edge.exitStub
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
-; CHECK-NEXT: LBB0_3: ## %codeRepl5.exitStub
+; CHECK-NEXT: LBB0_2: ## %codeRepl5.exitStub
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 7bdc4e19a1cf66..28b4541c1bfc7f 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: callq __ubyte_convert_to_ctype
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: js LBB0_4
+; CHECK-NEXT: js LBB0_6
; CHECK-NEXT: ## %bb.1: ## %cond_next.i
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: movq %rbx, %rdi
@@ -53,81 +53,84 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: sarl $31, %ecx
; CHECK-NEXT: andl %eax, %ecx
; CHECK-NEXT: cmpl $-2, %ecx
-; CHECK-NEXT: je LBB0_8
+; CHECK-NEXT: je LBB0_10
; CHECK-NEXT: ## %bb.2: ## %cond_next.i
; CHECK-NEXT: cmpl $-1, %ecx
-; CHECK-NEXT: jne LBB0_6
-; CHECK-NEXT: LBB0_3: ## %bb4
+; CHECK-NEXT: jne LBB0_3
+; CHECK-NEXT: LBB0_8: ## %bb4
; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 16(%rax), %rax
-; CHECK-NEXT: jmp LBB0_10
-; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT: jmp LBB0_9
+; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: cmpl $-2, %eax
-; CHECK-NEXT: je LBB0_8
-; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT: je LBB0_10
+; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: cmpl $-1, %eax
-; CHECK-NEXT: je LBB0_3
-; CHECK-NEXT: LBB0_6: ## %bb35
+; CHECK-NEXT: je LBB0_8
+; CHECK-NEXT: LBB0_3: ## %bb35
; CHECK-NEXT: movq _PyUFunc_API at GOTPCREL(%rip), %r14
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *216(%rax)
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: je LBB0_11
-; CHECK-NEXT: ## %bb.7: ## %cond_false.i
+; CHECK-NEXT: je LBB0_4
+; CHECK-NEXT: ## %bb.12: ## %cond_false.i
+; CHECK-NEXT: setne %dil
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: movzbl %sil, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movl %eax, %r15d
; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: jne LBB0_12
-; CHECK-NEXT: jmp LBB0_14
-; CHECK-NEXT: LBB0_8: ## %bb17
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb %dil, %al
+; CHECK-NEXT: jne LBB0_5
+; CHECK-NEXT: LBB0_13: ## %cond_true.i200
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: jne LBB0_15
+; CHECK-NEXT: ## %bb.14: ## %cond_true14.i
+; CHECK-NEXT: movl $4, %edi
+; CHECK-NEXT: callq _feraiseexcept
+; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: jmp LBB0_16
+; CHECK-NEXT: LBB0_10: ## %bb17
; CHECK-NEXT: callq _PyErr_Occurred
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jne LBB0_27
-; CHECK-NEXT: ## %bb.9: ## %cond_next
+; CHECK-NEXT: jne LBB0_23
+; CHECK-NEXT: ## %bb.11: ## %cond_next
; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 80(%rax), %rax
-; CHECK-NEXT: LBB0_10: ## %bb4
+; CHECK-NEXT: LBB0_9: ## %bb4
; CHECK-NEXT: movq 96(%rax), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: callq *40(%rax)
-; CHECK-NEXT: jmp LBB0_28
-; CHECK-NEXT: LBB0_11: ## %cond_true.i
+; CHECK-NEXT: jmp LBB0_24
+; CHECK-NEXT: LBB0_4: ## %cond_true.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
-; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: testb %sil, %sil
-; CHECK-NEXT: je LBB0_14
-; CHECK-NEXT: LBB0_12: ## %cond_false.i
+; CHECK-NEXT: sete %al
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: je LBB0_14
-; CHECK-NEXT: ## %bb.13: ## %cond_next17.i
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: xorl %r15d, %r15d
+; CHECK-NEXT: orb %al, %cl
+; CHECK-NEXT: jne LBB0_13
+; CHECK-NEXT: LBB0_5: ## %cond_next17.i
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movzbl %ah, %ebx
-; CHECK-NEXT: jmp LBB0_18
-; CHECK-NEXT: LBB0_14: ## %cond_true.i200
-; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: jne LBB0_17
-; CHECK-NEXT: ## %bb.16: ## %cond_true14.i
-; CHECK-NEXT: movl $4, %edi
-; CHECK-NEXT: callq _feraiseexcept
-; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit
-; CHECK-NEXT: xorl %ebx, %ebx
-; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *224(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je LBB0_21
-; CHECK-NEXT: ## %bb.19: ## %cond_true61
+; CHECK-NEXT: je LBB0_19
+; CHECK-NEXT: ## %bb.17: ## %cond_true61
; CHECK-NEXT: movl %eax, %ebp
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq _.str5 at GOTPCREL(%rip), %rdi
@@ -136,8 +139,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: callq *200(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: js LBB0_27
-; CHECK-NEXT: ## %bb.20: ## %cond_next73
+; CHECK-NEXT: js LBB0_23
+; CHECK-NEXT: ## %bb.18: ## %cond_next73
; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi
@@ -146,13 +149,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: callq *232(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: jne LBB0_27
-; CHECK-NEXT: LBB0_21: ## %cond_next89
+; CHECK-NEXT: jne LBB0_23
+; CHECK-NEXT: LBB0_19: ## %cond_next89
; CHECK-NEXT: movl $2, %edi
; CHECK-NEXT: callq _PyTuple_New
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_27
-; CHECK-NEXT: ## %bb.22: ## %cond_next97
+; CHECK-NEXT: je LBB0_23
+; CHECK-NEXT: ## %bb.20: ## %cond_next97
; CHECK-NEXT: movq %rax, %r14
; CHECK-NEXT: movq _PyArray_API at GOTPCREL(%rip), %r12
; CHECK-NEXT: movq (%r12), %rax
@@ -160,8 +163,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_25
-; CHECK-NEXT: ## %bb.23: ## %cond_next135
+; CHECK-NEXT: je LBB0_21
+; CHECK-NEXT: ## %bb.25: ## %cond_next135
; CHECK-NEXT: movb %r15b, 16(%rax)
; CHECK-NEXT: movq %rax, 24(%r14)
; CHECK-NEXT: movq (%r12), %rax
@@ -169,22 +172,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_25
-; CHECK-NEXT: ## %bb.24: ## %cond_next182
+; CHECK-NEXT: je LBB0_21
+; CHECK-NEXT: ## %bb.26: ## %cond_next182
; CHECK-NEXT: movb %bl, 16(%rax)
; CHECK-NEXT: movq %rax, 32(%r14)
; CHECK-NEXT: movq %r14, %rax
-; CHECK-NEXT: jmp LBB0_28
-; CHECK-NEXT: LBB0_25: ## %cond_true113
+; CHECK-NEXT: jmp LBB0_24
+; CHECK-NEXT: LBB0_21: ## %cond_true113
; CHECK-NEXT: decq (%r14)
-; CHECK-NEXT: jne LBB0_27
-; CHECK-NEXT: ## %bb.26: ## %cond_true126
+; CHECK-NEXT: jne LBB0_23
+; CHECK-NEXT: ## %bb.22: ## %cond_true126
; CHECK-NEXT: movq 8(%r14), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: callq *48(%rax)
-; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock
+; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock
+; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
diff --git a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 4482c5aec8e816..d9d4424267d733 100644
--- a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -16,15 +16,12 @@ define void @_ada_c34007g() {
; CHECK-NEXT: andl $-8, %esp
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: movl (%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: orl %eax, %ecx
+; CHECK-NEXT: sete %cl
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB0_3
-; CHECK-NEXT: # %bb.1: # %entry
-; CHECK-NEXT: orl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: jne .LBB0_3
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: .LBB0_3: # %bb5507
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb %cl, %al
; CHECK-NEXT: movl %ebp, %esp
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index dd60e641df2543..9147692db15645 100644
--- a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,5 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; REQUIRES: asserts
-; RUN: llc < %s -mtriple=i686-- -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16
+; RUN: llc < %s -mtriple=i686-- -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 9
; PR1909
@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <ptr> [#uses=1]
@@ -217,4 +218,4 @@ bb456: ; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
ret void
}
-declare i32 @printf(ptr, ...) nounwind
+declare i32 @printf(ptr, ...) nounwind
diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll
index 502bbf3f5d118b..4ab9c545ed90da 100644
--- a/llvm/test/CodeGen/X86/avx-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx-cmp.ll
@@ -26,40 +26,33 @@ declare void @scale() nounwind
define v...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/81689
More information about the llvm-commits
mailing list