[clang] [llvm] [profcheck] Fix profle metatdata propagation for Large Intger operations (PR #175862)
Jin Huang via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 16 15:29:37 PST 2026
https://github.com/jinhuang1102 updated https://github.com/llvm/llvm-project/pull/175862
>From 6bc05654cffeff9c7926ae1136255cbaaec15707 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor at nvidia.com>
Date: Fri, 16 Jan 2026 14:26:42 -0800
Subject: [PATCH] [CIR] Upstream handling for member ptr function compare
(#176029)
This adds handling for compare expressions involving member pointer to
functions.
---
.../CIR/Dialect/Transforms/CXXABILowering.cpp | 16 +-
.../Transforms/TargetLowering/CIRCXXABI.h | 4 +
.../TargetLowering/LowerItaniumCXXABI.cpp | 52 +
.../CodeGen/pointer-to-member-func-cmp.cpp | 121 ++
llvm/include/llvm/IR/ProfDataUtils.h | 7 +
llvm/lib/IR/ProfDataUtils.cpp | 14 +
llvm/lib/Transforms/Utils/IntegerDivision.cpp | 66 +-
llvm/test/CodeGen/RISCV/idiv_large.ll | 1061 ++++++++---------
.../X86/div-rem-pair-recomposition-signed.ll | 463 +++----
.../div-rem-pair-recomposition-unsigned.ll | 479 ++++----
llvm/test/CodeGen/X86/pr38539.ll | 187 +--
.../Transforms/ExpandIRInsts/X86/sdiv129.ll | 26 +-
.../Transforms/ExpandIRInsts/X86/srem129.ll | 26 +-
.../Transforms/ExpandIRInsts/X86/udiv129.ll | 26 +-
.../Transforms/ExpandIRInsts/X86/urem129.ll | 26 +-
.../Transforms/ExpandIRInsts/X86/vector.ll | 88 +-
16 files changed, 1492 insertions(+), 1170 deletions(-)
create mode 100644 clang/test/CIR/CodeGen/pointer-to-member-func-cmp.cpp
diff --git a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp
index 145f8574893f4..dbe656ac011d8 100644
--- a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp
@@ -159,12 +159,16 @@ mlir::LogicalResult CIRCmpOpABILowering::matchAndRewrite(
cir::CmpOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
mlir::Type type = op.getLhs().getType();
- assert((mlir::isa<cir::DataMemberType>(type)) &&
- "input to cmp in ABI lowering must be a data member");
-
- assert(!cir::MissingFeatures::methodType());
- mlir::Value loweredResult = lowerModule->getCXXABI().lowerDataMemberCmp(
- op, adaptor.getLhs(), adaptor.getRhs(), rewriter);
+ assert((mlir::isa<cir::DataMemberType, cir::MethodType>(type)) &&
+ "input to cmp in ABI lowering must be a data member or method");
+
+ mlir::Value loweredResult;
+ if (mlir::isa<cir::DataMemberType>(type))
+ loweredResult = lowerModule->getCXXABI().lowerDataMemberCmp(
+ op, adaptor.getLhs(), adaptor.getRhs(), rewriter);
+ else
+ loweredResult = lowerModule->getCXXABI().lowerMethodCmp(
+ op, adaptor.getLhs(), adaptor.getRhs(), rewriter);
rewriter.replaceOp(op, loweredResult);
return mlir::success();
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h
index 0dedfa7221f5f..f4d608cdbad03 100644
--- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h
@@ -81,6 +81,10 @@ class CIRCXXABI {
virtual mlir::Value lowerDataMemberCmp(cir::CmpOp op, mlir::Value loweredLhs,
mlir::Value loweredRhs,
mlir::OpBuilder &builder) const = 0;
+
+ virtual mlir::Value lowerMethodCmp(cir::CmpOp op, mlir::Value loweredLhs,
+ mlir::Value loweredRhs,
+ mlir::OpBuilder &builder) const = 0;
};
/// Creates an Itanium-family ABI.
diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
index d944fa3294684..39fcbcdf49f3e 100644
--- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
@@ -23,6 +23,7 @@
#include "CIRCXXABI.h"
#include "LowerModule.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "llvm/Support/ErrorHandling.h"
namespace cir {
@@ -67,6 +68,10 @@ class LowerItaniumCXXABI : public CIRCXXABI {
mlir::Value lowerDataMemberCmp(cir::CmpOp op, mlir::Value loweredLhs,
mlir::Value loweredRhs,
mlir::OpBuilder &builder) const override;
+
+ mlir::Value lowerMethodCmp(cir::CmpOp op, mlir::Value loweredLhs,
+ mlir::Value loweredRhs,
+ mlir::OpBuilder &builder) const override;
};
} // namespace
@@ -249,4 +254,51 @@ LowerItaniumCXXABI::lowerDataMemberCmp(cir::CmpOp op, mlir::Value loweredLhs,
loweredRhs);
}
+mlir::Value LowerItaniumCXXABI::lowerMethodCmp(cir::CmpOp op,
+ mlir::Value loweredLhs,
+ mlir::Value loweredRhs,
+ mlir::OpBuilder &builder) const {
+ assert(op.getKind() == cir::CmpOpKind::eq ||
+ op.getKind() == cir::CmpOpKind::ne);
+
+ mlir::ImplicitLocOpBuilder locBuilder(op.getLoc(), builder);
+ cir::IntType ptrdiffCIRTy = getPtrDiffCIRTy(lm);
+ mlir::Value ptrdiffZero =
+ cir::ConstantOp::create(locBuilder, cir::IntAttr::get(ptrdiffCIRTy, 0));
+
+ mlir::Value lhsPtrField =
+ cir::ExtractMemberOp::create(locBuilder, ptrdiffCIRTy, loweredLhs, 0);
+ mlir::Value rhsPtrField =
+ cir::ExtractMemberOp::create(locBuilder, ptrdiffCIRTy, loweredRhs, 0);
+ mlir::Value ptrCmp =
+ cir::CmpOp::create(locBuilder, op.getKind(), lhsPtrField, rhsPtrField);
+ mlir::Value ptrCmpToNull =
+ cir::CmpOp::create(locBuilder, op.getKind(), lhsPtrField, ptrdiffZero);
+
+ mlir::Value lhsAdjField =
+ cir::ExtractMemberOp::create(locBuilder, ptrdiffCIRTy, loweredLhs, 1);
+ mlir::Value rhsAdjField =
+ cir::ExtractMemberOp::create(locBuilder, ptrdiffCIRTy, loweredRhs, 1);
+ mlir::Value adjCmp =
+ cir::CmpOp::create(locBuilder, op.getKind(), lhsAdjField, rhsAdjField);
+
+ auto create_and = [&](mlir::Value lhs, mlir::Value rhs) {
+ return cir::BinOp::create(locBuilder, cir::BinOpKind::And, lhs, rhs);
+ };
+ auto create_or = [&](mlir::Value lhs, mlir::Value rhs) {
+ return cir::BinOp::create(locBuilder, cir::BinOpKind::Or, lhs, rhs);
+ };
+
+ mlir::Value result;
+ if (op.getKind() == cir::CmpOpKind::eq) {
+ // (lhs.ptr == null || lhs.adj == rhs.adj) && lhs.ptr == rhs.ptr
+ result = create_and(ptrCmp, create_or(ptrCmpToNull, adjCmp));
+ } else {
+ // (lhs.ptr != null && lhs.adj != rhs.adj) || lhs.ptr != rhs.ptr
+ result = create_or(ptrCmp, create_and(ptrCmpToNull, adjCmp));
+ }
+
+ return result;
+}
+
} // namespace cir
diff --git a/clang/test/CIR/CodeGen/pointer-to-member-func-cmp.cpp b/clang/test/CIR/CodeGen/pointer-to-member-func-cmp.cpp
new file mode 100644
index 0000000000000..f00d227e97fe7
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointer-to-member-func-cmp.cpp
@@ -0,0 +1,121 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir -mmlir -mlir-print-ir-before=cir-cxxabi-lowering %s -o %t.cir 2> %t-before.cir
+// RUN: FileCheck --check-prefix=CIR-BEFORE --input-file=%t-before.cir %s
+// RUN: FileCheck --check-prefix=CIR-AFTER --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll --check-prefix=LLVM %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct Foo {
+ void m1(int);
+ virtual void m2(int);
+ virtual void m3(int);
+};
+
+bool cmp_eq(void (Foo::*lhs)(int), void (Foo::*rhs)(int)) {
+ return lhs == rhs;
+}
+
+// CIR-BEFORE: cir.func {{.*}} @_Z6cmp_eqM3FooFviES1_
+// CIR-BEFORE: %[[LHS:.*]] = cir.load{{.*}} %0 : !cir.ptr<!cir.method<!cir.func<(!s32i)> in !rec_Foo>>
+// CIR-BEFORE: %[[RHS:.*]] = cir.load{{.*}} %1 : !cir.ptr<!cir.method<!cir.func<(!s32i)> in !rec_Foo>>
+// CIR-BEFORE: %[[CMP:.*]] = cir.cmp(eq, %[[LHS]], %[[RHS]]) : !cir.method<!cir.func<(!s32i)> in !rec_Foo>, !cir.bool
+// CIR-BEFORE: cir.store %[[CMP]], %{{.*}} : !cir.bool, !cir.ptr<!cir.bool>
+
+// CIR-AFTER: @_Z6cmp_eqM3FooFviES1_
+// CIR-AFTER: %[[LHS:.*]] = cir.load{{.*}} %0 : !cir.ptr<!rec_anon_struct>, !rec_anon_struct
+// CIR-AFTER: %[[RHS:.*]] = cir.load{{.*}} %1 : !cir.ptr<!rec_anon_struct>, !rec_anon_struct
+// CIR-AFTER: %[[NULL:.*]] = cir.const #cir.int<0> : !s64i
+// CIR-AFTER: %[[LHS_PTR:.*]] = cir.extract_member %[[LHS]][0] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[RHS_PTR:.*]] = cir.extract_member %[[RHS]][0] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[PTR_CMP:.*]] = cir.cmp(eq, %[[LHS_PTR]], %[[RHS_PTR]]) : !s64i, !cir.bool
+// CIR-AFTER: %[[PTR_NULL:.*]] = cir.cmp(eq, %[[LHS_PTR]], %[[NULL]]) : !s64i, !cir.bool
+// CIR-AFTER: %[[LHS_ADJ:.*]] = cir.extract_member %[[LHS]][1] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[RHS_ADJ:.*]] = cir.extract_member %[[RHS]][1] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[ADJ_CMP:.*]] = cir.cmp(eq, %[[LHS_ADJ]], %[[RHS_ADJ]]) : !s64i, !cir.bool
+// CIR-AFTER: %[[TMP:.*]] = cir.binop(or, %[[PTR_NULL]], %[[ADJ_CMP]]) : !cir.bool
+// CIR-AFTER: %[[RESULT:.*]] = cir.binop(and, %[[PTR_CMP]], %[[TMP]]) : !cir.bool
+
+// LLVM: define {{.*}} i1 @_Z6cmp_eqM3FooFviES1_
+// LLVM: %[[LHS:.*]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM: %[[RHS:.*]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM: %[[LHS_PTR:.*]] = extractvalue { i64, i64 } %[[LHS]], 0
+// LLVM: %[[RHS_PTR:.*]] = extractvalue { i64, i64 } %[[RHS]], 0
+// LLVM: %[[PTR_CMP:.*]] = icmp eq i64 %[[LHS_PTR]], %[[RHS_PTR]]
+// LLVM: %[[PTR_NULL:.*]] = icmp eq i64 %[[LHS_PTR]], 0
+// LLVM: %[[LHS_ADJ:.*]] = extractvalue { i64, i64 } %[[LHS]], 1
+// LLVM: %[[RHS_ADJ:.*]] = extractvalue { i64, i64 } %[[RHS]], 1
+// LLVM: %[[ADJ_CMP:.*]] = icmp eq i64 %[[LHS_ADJ]], %[[RHS_ADJ]]
+// LLVM: %[[TMP:.*]] = or i1 %[[PTR_NULL]], %[[ADJ_CMP]]
+// LLVM: %[[RESULT:.*]] = and i1 %[[PTR_CMP]], %[[TMP]]
+
+// OGCG: define {{.*}} i1 @_Z6cmp_eqM3FooFviES1_
+// OGCG: %[[LHS_TMP:.*]] = alloca { i64, i64 }
+// OGCG: %[[RHS_TMP:.*]] = alloca { i64, i64 }
+// OGCG: %[[LHS_ADDR:.*]] = alloca { i64, i64 }
+// OGCG: %[[RHS_ADDR:.*]] = alloca { i64, i64 }
+// OGCG: %[[LHS:.*]] = load { i64, i64 }, ptr %[[LHS_ADDR]]
+// OGCG: %[[RHS:.*]] = load { i64, i64 }, ptr %[[RHS_ADDR]]
+// OGCG: %[[LHS_PTR:.*]] = extractvalue { i64, i64 } %[[LHS]], 0
+// OGCG: %[[RHS_PTR:.*]] = extractvalue { i64, i64 } %[[RHS]], 0
+// OGCG: %[[PTR_CMP:.*]] = icmp eq i64 %[[LHS_PTR]], %[[RHS_PTR]]
+// OGCG: %[[PTR_NULL:.*]] = icmp eq i64 %[[LHS_PTR]], 0
+// OGCG: %[[LHS_ADJ:.*]] = extractvalue { i64, i64 } %[[LHS]], 1
+// OGCG: %[[RHS_ADJ:.*]] = extractvalue { i64, i64 } %[[RHS]], 1
+// OGCG: %[[ADJ_CMP:.*]] = icmp eq i64 %[[LHS_ADJ]], %[[RHS_ADJ]]
+// OGCG: %[[TMP:.*]] = or i1 %[[PTR_NULL]], %[[ADJ_CMP]]
+// OGCG: %[[RESULT:.*]] = and i1 %[[PTR_CMP]], %[[TMP]]
+
+bool cmp_ne(void (Foo::*lhs)(int), void (Foo::*rhs)(int)) {
+ return lhs != rhs;
+}
+
+// CIR-BEFORE: cir.func {{.*}} @_Z6cmp_neM3FooFviES1_
+// CIR-BEFORE: %[[LHS:.*]] = cir.load{{.*}} %0 : !cir.ptr<!cir.method<!cir.func<(!s32i)> in !rec_Foo>>
+// CIR-BEFORE: %[[RHS:.*]] = cir.load{{.*}} %1 : !cir.ptr<!cir.method<!cir.func<(!s32i)> in !rec_Foo>>
+// CIR-BEFORE: %[[CMP:.*]] = cir.cmp(ne, %[[LHS]], %[[RHS]]) : !cir.method<!cir.func<(!s32i)> in !rec_Foo>, !cir.bool
+// CIR-BEFORE: cir.store %[[CMP]], %{{.*}} : !cir.bool, !cir.ptr<!cir.bool>
+
+// CIR-AFTER: cir.func {{.*}} @_Z6cmp_neM3FooFviES1_
+// CIR-AFTER: %[[LHS:.*]] = cir.load{{.*}} %0 : !cir.ptr<!rec_anon_struct>, !rec_anon_struct
+// CIR-AFTER: %[[RHS:.*]] = cir.load{{.*}} %1 : !cir.ptr<!rec_anon_struct>, !rec_anon_struct
+// CIR-AFTER: %[[NULL:.*]] = cir.const #cir.int<0> : !s64i
+// CIR-AFTER: %[[LHS_PTR:.*]] = cir.extract_member %[[LHS]][0] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[RHS_PTR:.*]] = cir.extract_member %[[RHS]][0] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[PTR_CMP:.*]] = cir.cmp(ne, %[[LHS_PTR]], %[[RHS_PTR]]) : !s64i, !cir.bool
+// CIR-AFTER: %[[PTR_NULL:.*]] = cir.cmp(ne, %[[LHS_PTR]], %[[NULL]]) : !s64i, !cir.bool
+// CIR-AFTER: %[[LHS_ADJ:.*]] = cir.extract_member %[[LHS]][1] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[RHS_ADJ:.*]] = cir.extract_member %[[RHS]][1] : !rec_anon_struct -> !s64i
+// CIR-AFTER: %[[ADJ_CMP:.*]] = cir.cmp(ne, %[[LHS_ADJ]], %[[RHS_ADJ]]) : !s64i, !cir.bool
+// CIR-AFTER: %[[TMP:.*]] = cir.binop(and, %[[PTR_NULL]], %[[ADJ_CMP]]) : !cir.bool
+// CIR-AFTER: %[[RESULT:.*]] = cir.binop(or, %[[PTR_CMP]], %[[TMP]]) : !cir.bool
+
+// LLVM: define {{.*}} i1 @_Z6cmp_neM3FooFviES1_
+// LLVM: %[[LHS:.*]] = load { i64, i64 }, ptr %{{.*}}
+// LLVM: %[[RHS:.*]] = load { i64, i64 }, ptr %{{.*}}
+// LLVM: %[[LHS_PTR:.*]] = extractvalue { i64, i64 } %[[LHS]], 0
+// LLVM: %[[RHS_PTR:.*]] = extractvalue { i64, i64 } %[[RHS]], 0
+// LLVM: %[[PTR_CMP:.*]] = icmp ne i64 %[[LHS_PTR]], %[[RHS_PTR]]
+// LLVM: %[[PTR_NULL:.*]] = icmp ne i64 %[[LHS_PTR]], 0
+// LLVM: %[[LHS_ADJ:.*]] = extractvalue { i64, i64 } %[[LHS]], 1
+// LLVM: %[[RHS_ADJ:.*]] = extractvalue { i64, i64 } %[[RHS]], 1
+// LLVM: %[[ADJ_CMP:.*]] = icmp ne i64 %[[LHS_ADJ]], %[[RHS_ADJ]]
+// LLVM: %[[TMP:.*]] = and i1 %[[PTR_NULL]], %[[ADJ_CMP]]
+// LLVM: %[[RESULT:.*]] = or i1 %[[PTR_CMP]], %[[TMP]]
+
+// OGCG: define {{.*}} i1 @_Z6cmp_neM3FooFviES1_
+// OGCG: %[[LHS_TMP:.*]] = alloca { i64, i64 }
+// OGCG: %[[RHS_TMP:.*]] = alloca { i64, i64 }
+// OGCG: %[[LHS_ADDR:.*]] = alloca { i64, i64 }
+// OGCG: %[[RHS_ADDR:.*]] = alloca { i64, i64 }
+// OGCG: %[[LHS:.*]] = load { i64, i64 }, ptr %[[LHS_ADDR]]
+// OGCG: %[[RHS:.*]] = load { i64, i64 }, ptr %[[RHS_ADDR]]
+// OGCG: %[[LHS_PTR:.*]] = extractvalue { i64, i64 } %[[LHS]], 0
+// OGCG: %[[RHS_PTR:.*]] = extractvalue { i64, i64 } %[[RHS]], 0
+// OGCG: %[[PTR_CMP:.*]] = icmp ne i64 %[[LHS_PTR]], %[[RHS_PTR]]
+// OGCG: %[[PTR_NULL:.*]] = icmp ne i64 %[[LHS_PTR]], 0
+// OGCG: %[[LHS_ADJ:.*]] = extractvalue { i64, i64 } %[[LHS]], 1
+// OGCG: %[[RHS_ADJ:.*]] = extractvalue { i64, i64 } %[[RHS]], 1
+// OGCG: %[[ADJ_CMP:.*]] = icmp ne i64 %[[LHS_ADJ]], %[[RHS_ADJ]]
+// OGCG: %[[TMP:.*]] = and i1 %[[PTR_NULL]], %[[ADJ_CMP]]
+// OGCG: %[[RESULT:.*]] = or i1 %[[PTR_CMP]], %[[TMP]]
diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index c8cfccbe61e90..9cca8a4d2297f 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -15,6 +15,7 @@
#ifndef LLVM_IR_PROFDATAUTILS_H
#define LLVM_IR_PROFDATAUTILS_H
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Compiler.h"
@@ -216,6 +217,12 @@ LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I);
/// Scaling the profile data attached to 'I' using the ratio of S/T.
LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T);
+// Helper to apply a metadata setting function to an Instruction* if profiling
+// is enabled. If profiling is disabled (ProfcheckDisableMetadataFixes is true)
+// or V is not an Instruction, the callback will not be invoked.
+LLVM_ABI void applyProfMetadataIfEnabled(
+ Value *V, llvm::function_ref<void(Instruction *)> setMetadataCallback);
+
/// Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2
/// are 2 booleans that are the conditions of 2 branches for which we have the
/// branch weights B1 and B2, respectively. In both B1 and B2, the first
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index 1d6a7df5e91da..15cb7f9b2927f 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -24,6 +25,10 @@
using namespace llvm;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
// MD_prof nodes have the following layout
//
// In general:
@@ -391,3 +396,12 @@ void llvm::scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
}
I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
}
+
+void llvm::applyProfMetadataIfEnabled(
+ Value *V, llvm::function_ref<void(Instruction *)> setMetadataCallback) {
+ if (!ProfcheckDisableMetadataFixes) {
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ setMetadataCallback(Inst);
+ }
+ }
+}
diff --git a/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/llvm/lib/Transforms/Utils/IntegerDivision.cpp
index e95a7a9ae525a..529d645959d28 100644
--- a/llvm/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/llvm/lib/Transforms/Utils/IntegerDivision.cpp
@@ -16,8 +16,14 @@
#include "llvm/Transforms/Utils/IntegerDivision.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -235,11 +241,38 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
Value *Tmp1 = Builder.CreateCall(CTLZ, {Dividend, True});
Value *SR = Builder.CreateSub(Tmp0, Tmp1);
Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB);
+
+ // Add 'unlikely' branch weights. We mark the case where either the divisor
+ // or the dividend is equal to zero as unlikely.
Value *Ret0 = Builder.CreateLogicalOr(Ret0_3, Ret0_4);
+ applyProfMetadataIfEnabled(Ret0, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+ });
Value *RetDividend = Builder.CreateICmpEQ(SR, MSB);
+
+ // Conservatively, we treat the case |divisor| > |dividend| as unknown
+ // (50/50).
Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend);
+ applyProfMetadataIfEnabled(RetVal, [&](Instruction *Inst) {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+ });
Value *EarlyRet = Builder.CreateLogicalOr(Ret0, RetDividend);
- Builder.CreateCondBr(EarlyRet, End, BB1);
+ applyProfMetadataIfEnabled(EarlyRet, [&](Instruction *Inst) {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+ });
+
+ // The condition of this branch is based on `EarlyRet`. `EarlyRet` is true
+ // only for special cases like dividend or divisor being zero, or the divisor
+ // being greater than the dividend. Thus, the branch to `End` is unlikely,
+ // and we expect to more frequently enter `BB1`.
+ Value *ConBrSpecialCases = Builder.CreateCondBr(EarlyRet, End, BB1);
+ applyProfMetadataIfEnabled(ConBrSpecialCases, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+ });
// ; bb1: ; preds = %special-cases
// ; %sr_1 = add i32 %sr, 1
@@ -251,8 +284,17 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
Value *SR_1 = Builder.CreateAdd(SR, One);
Value *Tmp2 = Builder.CreateSub(MSB, SR);
Value *Q = Builder.CreateShl(Dividend, Tmp2);
+ // We assume that in the common case, the dividend's magnitude is larger than
+ // the divisor's magnitude such that the loop counter (SR) is non-zero.
+ // Specifically, if |dividend| >= 2 * |divisor|, then SR >= 1, ensuring SR_1
+ // >= 2. The case where SR_1 == 0 is thus considered unlikely.
Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
- Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+ Value *ConBrBB1 = Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
+ applyProfMetadataIfEnabled(ConBrBB1, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+ });
// ; preheader: ; preds = %bb1
// ; %tmp3 = lshr i32 %dividend, %sr_1
@@ -298,7 +340,15 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
Value *R = Builder.CreateSub(Tmp7, Tmp11);
Value *SR_2 = Builder.CreateAdd(SR_3, NegOne);
Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
- Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+ // The loop implements the core bit-by-bit binary long division algorithm.
+ // The branch is unlikely to exit the loop early until it has processed all
+ // significant bits.
+ Value *ConBrDoWhile = Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
+ applyProfMetadataIfEnabled(ConBrDoWhile, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+ });
// ; loop-exit: ; preds = %do-while, %bb1
// ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
@@ -437,7 +487,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
// Insert the unsigned division code
Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
Div->getOperand(1),
- Builder);
+ Builder);
Div->replaceAllUsesWith(Quotient);
Div->dropAllReferences();
Div->eraseFromParent();
@@ -455,7 +505,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
assert((Rem->getOpcode() == Instruction::SRem ||
Rem->getOpcode() == Instruction::URem) &&
- "Trying to expand remainder from a non-remainder function");
+ "Trying to expand remainder from a non-remainder function");
Type *RemTy = Rem->getType();
assert(!RemTy->isVectorTy() && "Div over vectors not supported");
@@ -504,7 +554,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
assert((Rem->getOpcode() == Instruction::SRem ||
Rem->getOpcode() == Instruction::URem) &&
- "Trying to expand remainder from a non-remainder function");
+ "Trying to expand remainder from a non-remainder function");
Type *RemTy = Rem->getType();
assert(!RemTy->isVectorTy() && "Div over vectors not supported");
@@ -551,7 +601,7 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
assert((Div->getOpcode() == Instruction::SDiv ||
Div->getOpcode() == Instruction::UDiv) &&
- "Trying to expand division from a non-division function");
+ "Trying to expand division from a non-division function");
Type *DivTy = Div->getType();
assert(!DivTy->isVectorTy() && "Div over vectors not supported");
@@ -599,7 +649,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
assert((Div->getOpcode() == Instruction::SDiv ||
Div->getOpcode() == Instruction::UDiv) &&
- "Trying to expand division from a non-division function");
+ "Trying to expand division from a non-division function");
Type *DivTy = Div->getType();
assert(!DivTy->isVectorTy() && "Div over vectors not supported");
diff --git a/llvm/test/CodeGen/RISCV/idiv_large.ll b/llvm/test/CodeGen/RISCV/idiv_large.ll
index 1d13f723ac224..2ad605bc3ff9e 100644
--- a/llvm/test/CodeGen/RISCV/idiv_large.ll
+++ b/llvm/test/CodeGen/RISCV/idiv_large.ll
@@ -471,7 +471,7 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a4, a0
+; RV32-NEXT: mv a3, a0
; RV32-NEXT: lw ra, 0(a2)
; RV32-NEXT: lw a5, 4(a2)
; RV32-NEXT: lw s9, 8(a2)
@@ -480,8 +480,8 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: addi t4, t4, 1365
; RV32-NEXT: lui t3, 209715
; RV32-NEXT: addi t3, t3, 819
-; RV32-NEXT: lui a7, 61681
-; RV32-NEXT: addi a7, a7, -241
+; RV32-NEXT: lui t2, 61681
+; RV32-NEXT: addi t2, t2, -241
; RV32-NEXT: bnez a5, .LBB2_2
; RV32-NEXT: # %bb.1: # %_udiv-special-cases
; RV32-NEXT: srli a0, ra, 1
@@ -504,7 +504,7 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: add a0, a6, a0
; RV32-NEXT: srli a6, a0, 4
; RV32-NEXT: add a0, a0, a6
-; RV32-NEXT: and a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
; RV32-NEXT: slli a6, a0, 8
; RV32-NEXT: add a0, a0, a6
; RV32-NEXT: slli a6, a0, 16
@@ -533,7 +533,7 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: add a0, a6, a0
; RV32-NEXT: srli a6, a0, 4
; RV32-NEXT: add a0, a0, a6
-; RV32-NEXT: and a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
; RV32-NEXT: slli a6, a0, 8
; RV32-NEXT: add a0, a0, a6
; RV32-NEXT: slli a6, a0, 16
@@ -546,73 +546,73 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: # %bb.4: # %_udiv-special-cases
; RV32-NEXT: srli a0, s9, 1
; RV32-NEXT: or a0, s9, a0
-; RV32-NEXT: srli t0, a0, 2
-; RV32-NEXT: or a0, a0, t0
-; RV32-NEXT: srli t0, a0, 4
-; RV32-NEXT: or a0, a0, t0
-; RV32-NEXT: srli t0, a0, 8
-; RV32-NEXT: or a0, a0, t0
-; RV32-NEXT: srli t0, a0, 16
-; RV32-NEXT: or a0, a0, t0
+; RV32-NEXT: srli a7, a0, 2
+; RV32-NEXT: or a0, a0, a7
+; RV32-NEXT: srli a7, a0, 4
+; RV32-NEXT: or a0, a0, a7
+; RV32-NEXT: srli a7, a0, 8
+; RV32-NEXT: or a0, a0, a7
+; RV32-NEXT: srli a7, a0, 16
+; RV32-NEXT: or a0, a0, a7
; RV32-NEXT: not a0, a0
-; RV32-NEXT: srli t0, a0, 1
-; RV32-NEXT: and t0, t0, t4
-; RV32-NEXT: sub a0, a0, t0
-; RV32-NEXT: and t0, a0, t3
+; RV32-NEXT: srli a7, a0, 1
+; RV32-NEXT: and a7, a7, t4
+; RV32-NEXT: sub a0, a0, a7
+; RV32-NEXT: and a7, a0, t3
; RV32-NEXT: srli a0, a0, 2
; RV32-NEXT: and a0, a0, t3
-; RV32-NEXT: add a0, t0, a0
-; RV32-NEXT: srli t0, a0, 4
-; RV32-NEXT: add a0, a0, t0
-; RV32-NEXT: and a0, a0, a7
-; RV32-NEXT: slli t0, a0, 8
-; RV32-NEXT: add a0, a0, t0
-; RV32-NEXT: slli t0, a0, 16
-; RV32-NEXT: add a0, a0, t0
+; RV32-NEXT: add a0, a7, a0
+; RV32-NEXT: srli a7, a0, 4
+; RV32-NEXT: add a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
+; RV32-NEXT: slli a7, a0, 8
+; RV32-NEXT: add a0, a0, a7
+; RV32-NEXT: slli a7, a0, 16
+; RV32-NEXT: add a0, a0, a7
; RV32-NEXT: srli a0, a0, 24
; RV32-NEXT: addi t5, a0, 32
; RV32-NEXT: j .LBB2_6
; RV32-NEXT: .LBB2_5:
; RV32-NEXT: srli a0, s10, 1
; RV32-NEXT: or a0, s10, a0
-; RV32-NEXT: srli t0, a0, 2
-; RV32-NEXT: or a0, a0, t0
-; RV32-NEXT: srli t0, a0, 4
-; RV32-NEXT: or a0, a0, t0
-; RV32-NEXT: srli t0, a0, 8
-; RV32-NEXT: or a0, a0, t0
-; RV32-NEXT: srli t0, a0, 16
-; RV32-NEXT: or a0, a0, t0
+; RV32-NEXT: srli a7, a0, 2
+; RV32-NEXT: or a0, a0, a7
+; RV32-NEXT: srli a7, a0, 4
+; RV32-NEXT: or a0, a0, a7
+; RV32-NEXT: srli a7, a0, 8
+; RV32-NEXT: or a0, a0, a7
+; RV32-NEXT: srli a7, a0, 16
+; RV32-NEXT: or a0, a0, a7
; RV32-NEXT: not a0, a0
-; RV32-NEXT: srli t0, a0, 1
-; RV32-NEXT: and t0, t0, t4
-; RV32-NEXT: sub a0, a0, t0
-; RV32-NEXT: and t0, a0, t3
+; RV32-NEXT: srli a7, a0, 1
+; RV32-NEXT: and a7, a7, t4
+; RV32-NEXT: sub a0, a0, a7
+; RV32-NEXT: and a7, a0, t3
; RV32-NEXT: srli a0, a0, 2
; RV32-NEXT: and a0, a0, t3
-; RV32-NEXT: add a0, t0, a0
-; RV32-NEXT: srli t0, a0, 4
-; RV32-NEXT: add a0, a0, t0
-; RV32-NEXT: and a0, a0, a7
-; RV32-NEXT: slli t0, a0, 8
-; RV32-NEXT: add a0, a0, t0
-; RV32-NEXT: slli t0, a0, 16
-; RV32-NEXT: add a0, a0, t0
+; RV32-NEXT: add a0, a7, a0
+; RV32-NEXT: srli a7, a0, 4
+; RV32-NEXT: add a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
+; RV32-NEXT: slli a7, a0, 8
+; RV32-NEXT: add a0, a0, a7
+; RV32-NEXT: slli a7, a0, 16
+; RV32-NEXT: add a0, a0, a7
; RV32-NEXT: srli t5, a0, 24
; RV32-NEXT: .LBB2_6: # %_udiv-special-cases
-; RV32-NEXT: lw t0, 12(a1)
+; RV32-NEXT: lw a7, 12(a1)
; RV32-NEXT: addi s0, t6, 64
; RV32-NEXT: bnez s1, .LBB2_8
; RV32-NEXT: # %bb.7: # %_udiv-special-cases
; RV32-NEXT: mv t5, s0
; RV32-NEXT: .LBB2_8: # %_udiv-special-cases
-; RV32-NEXT: lw t2, 0(a1)
-; RV32-NEXT: lw t1, 8(a1)
+; RV32-NEXT: lw t1, 0(a1)
+; RV32-NEXT: lw t0, 8(a1)
; RV32-NEXT: snez a1, s1
; RV32-NEXT: bnez a6, .LBB2_10
; RV32-NEXT: # %bb.9: # %_udiv-special-cases
-; RV32-NEXT: srli a0, t2, 1
-; RV32-NEXT: or a0, t2, a0
+; RV32-NEXT: srli a0, t1, 1
+; RV32-NEXT: or a0, t1, a0
; RV32-NEXT: srli s1, a0, 2
; RV32-NEXT: or a0, a0, s1
; RV32-NEXT: srli s1, a0, 4
@@ -631,7 +631,7 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: add a0, s1, a0
; RV32-NEXT: srli s1, a0, 4
; RV32-NEXT: add a0, a0, s1
-; RV32-NEXT: and a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
; RV32-NEXT: slli s1, a0, 8
; RV32-NEXT: add a0, a0, s1
; RV32-NEXT: slli s1, a0, 16
@@ -660,7 +660,7 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: add a0, s1, a0
; RV32-NEXT: srli s1, a0, 4
; RV32-NEXT: add a0, a0, s1
-; RV32-NEXT: and a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
; RV32-NEXT: slli s1, a0, 8
; RV32-NEXT: add a0, a0, s1
; RV32-NEXT: slli s1, a0, 16
@@ -669,19 +669,19 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: .LBB2_11: # %_udiv-special-cases
; RV32-NEXT: or s1, a5, s10
; RV32-NEXT: or s2, ra, s9
-; RV32-NEXT: or s3, a6, t0
-; RV32-NEXT: or s4, t2, t1
+; RV32-NEXT: or s3, a6, a7
+; RV32-NEXT: or s4, t1, t0
; RV32-NEXT: sltu t6, s0, t6
; RV32-NEXT: addi s0, a1, -1
; RV32-NEXT: addi a1, a0, 64
-; RV32-NEXT: or s5, t1, t0
+; RV32-NEXT: or s5, t0, a7
; RV32-NEXT: sltu s6, a1, a0
; RV32-NEXT: snez s7, s5
; RV32-NEXT: addi s7, s7, -1
-; RV32-NEXT: bnez t0, .LBB2_13
+; RV32-NEXT: bnez a7, .LBB2_13
; RV32-NEXT: # %bb.12: # %_udiv-special-cases
-; RV32-NEXT: srli a0, t1, 1
-; RV32-NEXT: or a0, t1, a0
+; RV32-NEXT: srli a0, t0, 1
+; RV32-NEXT: or a0, t0, a0
; RV32-NEXT: srli s8, a0, 2
; RV32-NEXT: or a0, a0, s8
; RV32-NEXT: srli s8, a0, 4
@@ -700,17 +700,17 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: add a0, t4, a0
; RV32-NEXT: srli t3, a0, 4
; RV32-NEXT: add a0, a0, t3
-; RV32-NEXT: and a0, a0, a7
-; RV32-NEXT: slli a7, a0, 8
-; RV32-NEXT: add a0, a0, a7
-; RV32-NEXT: slli a7, a0, 16
-; RV32-NEXT: add a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
+; RV32-NEXT: slli t2, a0, 8
+; RV32-NEXT: add a0, a0, t2
+; RV32-NEXT: slli t2, a0, 16
+; RV32-NEXT: add a0, a0, t2
; RV32-NEXT: srli a0, a0, 24
; RV32-NEXT: addi a0, a0, 32
; RV32-NEXT: j .LBB2_14
; RV32-NEXT: .LBB2_13:
-; RV32-NEXT: srli a0, t0, 1
-; RV32-NEXT: or a0, t0, a0
+; RV32-NEXT: srli a0, a7, 1
+; RV32-NEXT: or a0, a7, a0
; RV32-NEXT: srli s8, a0, 2
; RV32-NEXT: or a0, a0, s8
; RV32-NEXT: srli s8, a0, 4
@@ -729,16 +729,16 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: add a0, t4, a0
; RV32-NEXT: srli t3, a0, 4
; RV32-NEXT: add a0, a0, t3
-; RV32-NEXT: and a0, a0, a7
-; RV32-NEXT: slli a7, a0, 8
-; RV32-NEXT: add a0, a0, a7
-; RV32-NEXT: slli a7, a0, 16
-; RV32-NEXT: add a0, a0, a7
+; RV32-NEXT: and a0, a0, t2
+; RV32-NEXT: slli t2, a0, 8
+; RV32-NEXT: add a0, a0, t2
+; RV32-NEXT: slli t2, a0, 16
+; RV32-NEXT: add a0, a0, t2
; RV32-NEXT: srli a0, a0, 24
; RV32-NEXT: .LBB2_14: # %_udiv-special-cases
; RV32-NEXT: or t4, s2, s1
; RV32-NEXT: or s1, s4, s3
-; RV32-NEXT: and a7, s0, t6
+; RV32-NEXT: and t2, s0, t6
; RV32-NEXT: and t3, s7, s6
; RV32-NEXT: bnez s5, .LBB2_16
; RV32-NEXT: # %bb.15: # %_udiv-special-cases
@@ -747,125 +747,92 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: seqz a1, t4
; RV32-NEXT: seqz t4, s1
; RV32-NEXT: sltu t6, t5, a0
-; RV32-NEXT: sub s1, a7, t3
+; RV32-NEXT: sub s1, t2, t3
; RV32-NEXT: mv s0, t6
-; RV32-NEXT: beq a7, t3, .LBB2_18
+; RV32-NEXT: beq t2, t3, .LBB2_18
; RV32-NEXT: # %bb.17: # %_udiv-special-cases
-; RV32-NEXT: sltu s0, a7, t3
+; RV32-NEXT: sltu s0, t2, t3
; RV32-NEXT: .LBB2_18: # %_udiv-special-cases
-; RV32-NEXT: sub t3, s1, t6
+; RV32-NEXT: sub t2, s1, t6
; RV32-NEXT: or a1, a1, t4
; RV32-NEXT: neg t6, s0
; RV32-NEXT: seqz s0, s0
; RV32-NEXT: addi s0, s0, -1
-; RV32-NEXT: or a7, t6, s0
-; RV32-NEXT: sub t4, t5, a0
-; RV32-NEXT: beqz a7, .LBB2_20
+; RV32-NEXT: or t4, t6, s0
+; RV32-NEXT: sub t3, t5, a0
+; RV32-NEXT: beqz t4, .LBB2_20
; RV32-NEXT: # %bb.19: # %_udiv-special-cases
-; RV32-NEXT: snez a0, a7
+; RV32-NEXT: snez a0, t4
; RV32-NEXT: j .LBB2_21
; RV32-NEXT: .LBB2_20:
-; RV32-NEXT: snez a0, t3
-; RV32-NEXT: sltiu a7, t4, 128
-; RV32-NEXT: xori a7, a7, 1
-; RV32-NEXT: or a0, a7, a0
+; RV32-NEXT: snez a0, t2
+; RV32-NEXT: sltiu t4, t3, 128
+; RV32-NEXT: xori t4, t4, 1
+; RV32-NEXT: or a0, t4, a0
; RV32-NEXT: .LBB2_21: # %_udiv-special-cases
; RV32-NEXT: or s1, a1, a0
-; RV32-NEXT: addi a1, s1, -1
-; RV32-NEXT: and a7, a1, t0
-; RV32-NEXT: and t5, a1, t1
-; RV32-NEXT: and a0, a1, a6
-; RV32-NEXT: and a1, a1, t2
-; RV32-NEXT: bnez s1, .LBB2_25
+; RV32-NEXT: addi t5, s1, -1
+; RV32-NEXT: and a1, t5, a7
+; RV32-NEXT: and t4, t5, t0
+; RV32-NEXT: and a0, t5, a6
+; RV32-NEXT: and t5, t5, t1
+; RV32-NEXT: bnez s1, .LBB2_32
; RV32-NEXT: # %bb.22: # %_udiv-special-cases
-; RV32-NEXT: xori s1, t4, 127
+; RV32-NEXT: xori s1, t3, 127
; RV32-NEXT: or s1, s1, t6
-; RV32-NEXT: or s2, t3, s0
+; RV32-NEXT: or s2, t2, s0
; RV32-NEXT: or s1, s1, s2
-; RV32-NEXT: beqz s1, .LBB2_25
+; RV32-NEXT: beqz s1, .LBB2_32
; RV32-NEXT: # %bb.23: # %udiv-bb1
-; RV32-NEXT: sw a4, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT: addi a7, t4, 1
+; RV32-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi a1, t3, 1
; RV32-NEXT: sw zero, 56(sp)
; RV32-NEXT: sw zero, 60(sp)
; RV32-NEXT: sw zero, 64(sp)
; RV32-NEXT: sw zero, 68(sp)
-; RV32-NEXT: sw t2, 72(sp)
+; RV32-NEXT: sw t1, 72(sp)
; RV32-NEXT: sw a6, 76(sp)
-; RV32-NEXT: sw t1, 80(sp)
-; RV32-NEXT: sw t0, 84(sp)
+; RV32-NEXT: sw t0, 80(sp)
+; RV32-NEXT: sw a7, 84(sp)
; RV32-NEXT: li a0, 127
-; RV32-NEXT: addi a2, sp, 72
-; RV32-NEXT: seqz a4, a7
-; RV32-NEXT: sub a0, a0, t4
-; RV32-NEXT: add t3, t3, a4
+; RV32-NEXT: addi a3, sp, 72
+; RV32-NEXT: seqz a4, a1
+; RV32-NEXT: sub a0, a0, t3
+; RV32-NEXT: add t2, t2, a4
; RV32-NEXT: andi a4, a0, 31
; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: or t5, a7, t3
+; RV32-NEXT: or t4, a1, t2
; RV32-NEXT: xori a4, a4, 31
; RV32-NEXT: andi a0, a0, 12
-; RV32-NEXT: seqz t5, t5
-; RV32-NEXT: sub a2, a2, a0
-; RV32-NEXT: add t5, t6, t5
-; RV32-NEXT: lw a0, 0(a2)
-; RV32-NEXT: lw s1, 4(a2)
-; RV32-NEXT: lw s3, 8(a2)
-; RV32-NEXT: lw a2, 12(a2)
-; RV32-NEXT: sltu t6, t5, t6
-; RV32-NEXT: or s2, a7, t5
-; RV32-NEXT: add t6, s0, t6
-; RV32-NEXT: or s0, t3, t6
-; RV32-NEXT: or s0, s2, s0
-; RV32-NEXT: srli s2, s3, 1
-; RV32-NEXT: srli s4, s1, 1
-; RV32-NEXT: srli s5, a0, 1
-; RV32-NEXT: srl s2, s2, a4
-; RV32-NEXT: srl s4, s4, a4
-; RV32-NEXT: srl a4, s5, a4
-; RV32-NEXT: not t4, t4
-; RV32-NEXT: sll a2, a2, t4
-; RV32-NEXT: or s2, a2, s2
-; RV32-NEXT: sll a2, s3, t4
-; RV32-NEXT: or a2, a2, s4
-; RV32-NEXT: sll s1, s1, t4
+; RV32-NEXT: seqz t4, t4
+; RV32-NEXT: sub a3, a3, a0
+; RV32-NEXT: add t4, t6, t4
+; RV32-NEXT: lw a0, 0(a3)
+; RV32-NEXT: lw s1, 4(a3)
+; RV32-NEXT: lw s3, 8(a3)
+; RV32-NEXT: lw a3, 12(a3)
+; RV32-NEXT: sltu t5, t4, t6
+; RV32-NEXT: or t6, a1, t4
+; RV32-NEXT: add t5, s0, t5
+; RV32-NEXT: or s0, t2, t5
+; RV32-NEXT: or t6, t6, s0
+; RV32-NEXT: srli s0, s3, 1
+; RV32-NEXT: srli s2, s1, 1
+; RV32-NEXT: srli s4, a0, 1
+; RV32-NEXT: srl s0, s0, a4
+; RV32-NEXT: srl s5, s2, a4
+; RV32-NEXT: srl a4, s4, a4
+; RV32-NEXT: not t3, t3
+; RV32-NEXT: sll a3, a3, t3
+; RV32-NEXT: or s2, a3, s0
+; RV32-NEXT: sll a3, s3, t3
+; RV32-NEXT: sll s1, s1, t3
+; RV32-NEXT: or a3, a3, s5
; RV32-NEXT: or s1, s1, a4
-; RV32-NEXT: sll t4, a0, t4
-; RV32-NEXT: li a1, 0
-; RV32-NEXT: bnez s0, .LBB2_26
-; RV32-NEXT: .LBB2_24: # %udiv-loop-exit
-; RV32-NEXT: srli a0, t4, 31
-; RV32-NEXT: slli a3, s1, 1
-; RV32-NEXT: srli s1, s1, 31
-; RV32-NEXT: or a0, a3, a0
-; RV32-NEXT: slli a3, a2, 1
-; RV32-NEXT: srli s0, a2, 31
-; RV32-NEXT: slli s2, s2, 1
-; RV32-NEXT: slli t4, t4, 1
-; RV32-NEXT: or t5, a3, s1
-; RV32-NEXT: or a7, s2, s0
-; RV32-NEXT: or a1, a1, t4
-; RV32-NEXT: lw a4, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT: .LBB2_25: # %udiv-end
-; RV32-NEXT: sw a1, 0(a4)
-; RV32-NEXT: sw a0, 4(a4)
-; RV32-NEXT: sw t5, 8(a4)
-; RV32-NEXT: sw a7, 12(a4)
-; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 144
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB2_26: # %udiv-preheader
+; RV32-NEXT: sll t3, a0, t3
+; RV32-NEXT: beqz t6, .LBB2_31
+; RV32-NEXT: # %bb.24: # %udiv-preheader
+; RV32-NEXT: li t6, 0
; RV32-NEXT: li s3, 0
; RV32-NEXT: li s4, 0
; RV32-NEXT: li s5, 0
@@ -873,118 +840,118 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: sw zero, 44(sp)
; RV32-NEXT: sw zero, 48(sp)
; RV32-NEXT: sw zero, 52(sp)
-; RV32-NEXT: sw t2, 24(sp)
+; RV32-NEXT: sw t1, 24(sp)
; RV32-NEXT: sw a6, 28(sp)
-; RV32-NEXT: sw t1, 32(sp)
-; RV32-NEXT: sw t0, 36(sp)
-; RV32-NEXT: srli a0, a7, 3
+; RV32-NEXT: sw t0, 32(sp)
+; RV32-NEXT: sw a7, 36(sp)
+; RV32-NEXT: srli a0, a1, 3
; RV32-NEXT: addi a4, sp, 24
; RV32-NEXT: andi a0, a0, 12
; RV32-NEXT: add a0, a4, a0
; RV32-NEXT: lw a4, 4(a0)
; RV32-NEXT: lw a6, 8(a0)
-; RV32-NEXT: lw t2, 12(a0)
+; RV32-NEXT: lw t1, 12(a0)
; RV32-NEXT: lw a0, 0(a0)
-; RV32-NEXT: andi t0, a7, 31
-; RV32-NEXT: xori t0, t0, 31
-; RV32-NEXT: slli t1, t2, 1
+; RV32-NEXT: andi a7, a1, 31
+; RV32-NEXT: xori a7, a7, 31
+; RV32-NEXT: slli t0, t1, 1
; RV32-NEXT: slli s0, a6, 1
; RV32-NEXT: slli s6, a4, 1
-; RV32-NEXT: sll t1, t1, t0
-; RV32-NEXT: sll s0, s0, t0
-; RV32-NEXT: sll s8, s6, t0
-; RV32-NEXT: seqz t0, ra
-; RV32-NEXT: srl a6, a6, a7
-; RV32-NEXT: or s6, a6, t1
-; RV32-NEXT: or t1, ra, a5
+; RV32-NEXT: sll t0, t0, a7
+; RV32-NEXT: sll s0, s0, a7
+; RV32-NEXT: sll s8, s6, a7
+; RV32-NEXT: seqz a7, ra
+; RV32-NEXT: srl a6, a6, a1
+; RV32-NEXT: or s6, a6, t0
+; RV32-NEXT: or t0, ra, a5
; RV32-NEXT: sw a5, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sub a6, a5, t0
-; RV32-NEXT: seqz t1, t1
-; RV32-NEXT: srl a4, a4, a7
+; RV32-NEXT: sub a6, a5, a7
+; RV32-NEXT: seqz t0, t0
+; RV32-NEXT: srl a4, a4, a1
; RV32-NEXT: or s7, a4, s0
-; RV32-NEXT: sub t0, s9, t1
-; RV32-NEXT: mv a3, s9
-; RV32-NEXT: sltu a4, s9, t1
-; RV32-NEXT: mv t1, s10
+; RV32-NEXT: sub a7, s9, t0
+; RV32-NEXT: mv a2, s9
+; RV32-NEXT: sltu a4, s9, t0
+; RV32-NEXT: mv t0, s10
; RV32-NEXT: sub a4, s10, a4
; RV32-NEXT: sw a4, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: srl a0, a0, a7
-; RV32-NEXT: srl s9, t2, a7
+; RV32-NEXT: srl a0, a0, a1
+; RV32-NEXT: srl s9, t1, a1
; RV32-NEXT: or s8, a0, s8
; RV32-NEXT: addi a0, ra, -1
; RV32-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: j .LBB2_28
-; RV32-NEXT: .LBB2_27: # %udiv-do-while
-; RV32-NEXT: # in Loop: Header=BB2_28 Depth=1
-; RV32-NEXT: srli s0, a2, 31
+; RV32-NEXT: j .LBB2_26
+; RV32-NEXT: .LBB2_25: # %udiv-do-while
+; RV32-NEXT: # in Loop: Header=BB2_26 Depth=1
+; RV32-NEXT: srli s0, a3, 31
; RV32-NEXT: slli s2, s2, 1
; RV32-NEXT: sub a0, s11, a0
; RV32-NEXT: srli s11, s1, 31
-; RV32-NEXT: slli a2, a2, 1
+; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: or s0, s2, s0
-; RV32-NEXT: srli s2, t4, 31
+; RV32-NEXT: srli s2, t3, 31
; RV32-NEXT: slli s1, s1, 1
-; RV32-NEXT: slli t4, t4, 1
-; RV32-NEXT: or a2, a2, s11
-; RV32-NEXT: and s11, s7, t1
+; RV32-NEXT: slli t3, t3, 1
+; RV32-NEXT: or a3, a3, s11
+; RV32-NEXT: and s11, s7, t0
; RV32-NEXT: or s1, s1, s2
-; RV32-NEXT: and s2, s7, a3
-; RV32-NEXT: or t4, a1, t4
-; RV32-NEXT: sub a4, t2, s2
-; RV32-NEXT: sltu t2, t2, s2
-; RV32-NEXT: or s2, a7, t3
+; RV32-NEXT: and s2, s7, a2
+; RV32-NEXT: or t3, t6, t3
+; RV32-NEXT: sub a4, t1, s2
+; RV32-NEXT: sltu t1, t1, s2
+; RV32-NEXT: or s2, a1, t2
; RV32-NEXT: sub s11, s6, s11
-; RV32-NEXT: seqz s6, a7
-; RV32-NEXT: addi a7, a7, -1
-; RV32-NEXT: andi a1, s7, 1
+; RV32-NEXT: seqz s6, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: andi t6, s7, 1
; RV32-NEXT: sub s7, a0, ra
; RV32-NEXT: seqz a0, s2
-; RV32-NEXT: sub t3, t3, s6
+; RV32-NEXT: sub t2, t2, s6
; RV32-NEXT: or s1, s3, s1
-; RV32-NEXT: or a2, s4, a2
+; RV32-NEXT: or a3, s4, a3
; RV32-NEXT: or s2, s5, s0
; RV32-NEXT: sub s6, a4, s9
; RV32-NEXT: sltu a4, a4, s9
-; RV32-NEXT: sub t2, s11, t2
-; RV32-NEXT: sltu s0, t5, a0
-; RV32-NEXT: sub t5, t5, a0
-; RV32-NEXT: sub s9, t2, a4
-; RV32-NEXT: sub t6, t6, s0
-; RV32-NEXT: or a0, t3, t6
-; RV32-NEXT: or a4, a7, t5
+; RV32-NEXT: sub t1, s11, t1
+; RV32-NEXT: sltu s0, t4, a0
+; RV32-NEXT: sub t4, t4, a0
+; RV32-NEXT: sub s9, t1, a4
+; RV32-NEXT: sub t5, t5, s0
+; RV32-NEXT: or a0, t2, t5
+; RV32-NEXT: or a4, a1, t4
; RV32-NEXT: or a0, a4, a0
; RV32-NEXT: sub s8, s8, s10
; RV32-NEXT: li s3, 0
; RV32-NEXT: li s4, 0
; RV32-NEXT: li s5, 0
; RV32-NEXT: mv ra, a5
-; RV32-NEXT: beqz a0, .LBB2_24
-; RV32-NEXT: .LBB2_28: # %udiv-do-while
+; RV32-NEXT: beqz a0, .LBB2_31
+; RV32-NEXT: .LBB2_26: # %udiv-do-while
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: srli a0, s8, 31
-; RV32-NEXT: slli t2, s7, 1
+; RV32-NEXT: slli t1, s7, 1
; RV32-NEXT: slli s8, s8, 1
-; RV32-NEXT: or s11, t2, a0
+; RV32-NEXT: or s11, t1, a0
; RV32-NEXT: srli a0, s2, 31
; RV32-NEXT: or s8, s8, a0
-; RV32-NEXT: beq a6, s11, .LBB2_30
-; RV32-NEXT: # %bb.29: # %udiv-do-while
-; RV32-NEXT: # in Loop: Header=BB2_28 Depth=1
+; RV32-NEXT: beq a6, s11, .LBB2_28
+; RV32-NEXT: # %bb.27: # %udiv-do-while
+; RV32-NEXT: # in Loop: Header=BB2_26 Depth=1
; RV32-NEXT: sltu a0, a6, s11
-; RV32-NEXT: j .LBB2_31
-; RV32-NEXT: .LBB2_30: # in Loop: Header=BB2_28 Depth=1
+; RV32-NEXT: j .LBB2_29
+; RV32-NEXT: .LBB2_28: # in Loop: Header=BB2_26 Depth=1
; RV32-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a0, a0, s8
-; RV32-NEXT: .LBB2_31: # %udiv-do-while
-; RV32-NEXT: # in Loop: Header=BB2_28 Depth=1
-; RV32-NEXT: srli t2, s6, 31
+; RV32-NEXT: .LBB2_29: # %udiv-do-while
+; RV32-NEXT: # in Loop: Header=BB2_26 Depth=1
+; RV32-NEXT: srli t1, s6, 31
; RV32-NEXT: slli s9, s9, 1
; RV32-NEXT: srli s7, s7, 31
; RV32-NEXT: slli s10, s6, 1
-; RV32-NEXT: or s6, s9, t2
-; RV32-NEXT: or t2, s10, s7
-; RV32-NEXT: sub s7, t0, t2
-; RV32-NEXT: sltu s9, t0, t2
+; RV32-NEXT: or s6, s9, t1
+; RV32-NEXT: or t1, s10, s7
+; RV32-NEXT: sub s7, a7, t1
+; RV32-NEXT: sltu s9, a7, t1
; RV32-NEXT: lw a4, 16(sp) # 4-byte Folded Reload
; RV32-NEXT: sub s10, a4, s6
; RV32-NEXT: sltu a0, s7, a0
@@ -997,11 +964,44 @@ define i128 @udiv_i128(i128 %x, i128 %y) nounwind {
; RV32-NEXT: and a0, s7, a0
; RV32-NEXT: sltu ra, s8, s10
; RV32-NEXT: mv s9, ra
-; RV32-NEXT: beq s11, a0, .LBB2_27
-; RV32-NEXT: # %bb.32: # %udiv-do-while
-; RV32-NEXT: # in Loop: Header=BB2_28 Depth=1
+; RV32-NEXT: beq s11, a0, .LBB2_25
+; RV32-NEXT: # %bb.30: # %udiv-do-while
+; RV32-NEXT: # in Loop: Header=BB2_26 Depth=1
; RV32-NEXT: sltu s9, s11, a0
-; RV32-NEXT: j .LBB2_27
+; RV32-NEXT: j .LBB2_25
+; RV32-NEXT: .LBB2_31: # %udiv-loop-exit
+; RV32-NEXT: srli a0, t3, 31
+; RV32-NEXT: slli a1, s1, 1
+; RV32-NEXT: srli s1, s1, 31
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: slli a1, a3, 1
+; RV32-NEXT: srli s0, a3, 31
+; RV32-NEXT: slli s2, s2, 1
+; RV32-NEXT: slli t3, t3, 1
+; RV32-NEXT: or t4, a1, s1
+; RV32-NEXT: or a1, s2, s0
+; RV32-NEXT: or t5, t6, t3
+; RV32-NEXT: lw a3, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: .LBB2_32: # %udiv-end
+; RV32-NEXT: sw t5, 0(a3)
+; RV32-NEXT: sw a0, 4(a3)
+; RV32-NEXT: sw t4, 8(a3)
+; RV32-NEXT: sw a1, 12(a3)
+; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 144
+; RV32-NEXT: ret
;
; RV64-LABEL: udiv_i128:
; RV64: # %bb.0:
@@ -1038,20 +1038,20 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: lw a5, 4(a2)
; RV32-NEXT: lw a6, 8(a2)
; RV32-NEXT: lw a0, 12(a2)
-; RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: lui a0, 349525
; RV32-NEXT: lui a2, 209715
; RV32-NEXT: lui a3, 61681
; RV32-NEXT: addi t5, a0, 1365
; RV32-NEXT: addi t4, a2, 819
; RV32-NEXT: addi t3, a3, -241
-; RV32-NEXT: sw a6, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a6, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: slli a0, a6, 31
; RV32-NEXT: srli a2, a5, 1
-; RV32-NEXT: sw a5, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a5, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: slli a3, a5, 31
; RV32-NEXT: or a6, a2, a0
-; RV32-NEXT: sw a4, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a4, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: srli a0, a4, 1
; RV32-NEXT: or a7, a0, a3
; RV32-NEXT: bnez a6, .LBB3_2
@@ -1082,7 +1082,7 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: slli a2, a0, 16
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: srli a0, a0, 24
-; RV32-NEXT: addi a4, a0, 32
+; RV32-NEXT: addi a5, a0, 32
; RV32-NEXT: j .LBB3_3
; RV32-NEXT: .LBB3_2:
; RV32-NEXT: srli a0, a6, 1
@@ -1110,20 +1110,20 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: slli a2, a0, 16
; RV32-NEXT: add a0, a0, a2
-; RV32-NEXT: srli a4, a0, 24
+; RV32-NEXT: srli a5, a0, 24
; RV32-NEXT: .LBB3_3: # %_udiv-special-cases
-; RV32-NEXT: lw a5, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: srli a0, a5, 1
+; RV32-NEXT: lw a4, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: srli a0, a4, 1
; RV32-NEXT: slli a3, t2, 31
-; RV32-NEXT: slli a5, a5, 31
-; RV32-NEXT: lw a2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: slli a4, a4, 31
+; RV32-NEXT: lw a2, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: srli t0, a2, 1
-; RV32-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 32(sp) # 4-byte Folded Reload
; RV32-NEXT: slli a2, a2, 31
; RV32-NEXT: li s2, 64
; RV32-NEXT: bnez a2, .LBB3_5
; RV32-NEXT: # %bb.4: # %_udiv-special-cases
-; RV32-NEXT: li t1, 64
+; RV32-NEXT: li t6, 64
; RV32-NEXT: j .LBB3_6
; RV32-NEXT: .LBB3_5:
; RV32-NEXT: srli t1, a2, 1
@@ -1151,91 +1151,91 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: add t1, t1, t6
; RV32-NEXT: slli t6, t1, 16
; RV32-NEXT: add t1, t1, t6
-; RV32-NEXT: srli t1, t1, 24
+; RV32-NEXT: srli t6, t1, 24
; RV32-NEXT: .LBB3_6: # %_udiv-special-cases
-; RV32-NEXT: or a3, a3, a0
-; RV32-NEXT: or a5, t0, a5
+; RV32-NEXT: or t1, a3, a0
+; RV32-NEXT: or t0, t0, a4
; RV32-NEXT: bnez a2, .LBB3_8
; RV32-NEXT: # %bb.7: # %_udiv-special-cases
-; RV32-NEXT: li t1, 128
+; RV32-NEXT: li t6, 128
; RV32-NEXT: .LBB3_8: # %_udiv-special-cases
-; RV32-NEXT: or t0, a5, a3
-; RV32-NEXT: addi a2, a4, 64
-; RV32-NEXT: addi a0, t1, 128
-; RV32-NEXT: or a6, a6, a3
-; RV32-NEXT: or a7, a7, a5
-; RV32-NEXT: or s3, a7, a6
-; RV32-NEXT: sltu s0, a0, t1
+; RV32-NEXT: or a4, t0, t1
+; RV32-NEXT: addi a3, a5, 64
+; RV32-NEXT: addi a0, t6, 128
+; RV32-NEXT: or a2, a6, t1
+; RV32-NEXT: or a6, a7, t0
+; RV32-NEXT: or s3, a6, a2
+; RV32-NEXT: sltu s0, a0, t6
; RV32-NEXT: bnez s3, .LBB3_11
; RV32-NEXT: # %bb.9: # %_udiv-special-cases
; RV32-NEXT: mv t6, s0
-; RV32-NEXT: beqz a3, .LBB3_12
+; RV32-NEXT: beqz t1, .LBB3_12
; RV32-NEXT: .LBB3_10:
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 2
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 8
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: not a3, a3
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: and a4, a4, t5
-; RV32-NEXT: sub a3, a3, a4
-; RV32-NEXT: and a4, a3, t4
-; RV32-NEXT: srli a3, a3, 2
-; RV32-NEXT: and a3, a3, t4
-; RV32-NEXT: add a3, a4, a3
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: and a3, a3, t3
-; RV32-NEXT: slli a4, a3, 8
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: slli a4, a3, 16
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: srli s1, a3, 24
-; RV32-NEXT: beqz t0, .LBB3_13
+; RV32-NEXT: srli a2, t1, 1
+; RV32-NEXT: or a2, t1, a2
+; RV32-NEXT: srli a5, a2, 2
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: srli a5, a2, 4
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: srli a5, a2, 8
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: srli a5, a2, 16
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: srli a5, a2, 1
+; RV32-NEXT: and a5, a5, t5
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: and a5, a2, t4
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: and a2, a2, t4
+; RV32-NEXT: add a2, a5, a2
+; RV32-NEXT: srli a5, a2, 4
+; RV32-NEXT: add a2, a2, a5
+; RV32-NEXT: and a2, a2, t3
+; RV32-NEXT: slli a5, a2, 8
+; RV32-NEXT: add a2, a2, a5
+; RV32-NEXT: slli a5, a2, 16
+; RV32-NEXT: add a2, a2, a5
+; RV32-NEXT: srli s1, a2, 24
+; RV32-NEXT: beqz a4, .LBB3_13
; RV32-NEXT: j .LBB3_14
; RV32-NEXT: .LBB3_11:
-; RV32-NEXT: snez a6, t0
-; RV32-NEXT: sltu a4, a2, a4
-; RV32-NEXT: addi a6, a6, -1
-; RV32-NEXT: and t6, a6, a4
-; RV32-NEXT: bnez a3, .LBB3_10
+; RV32-NEXT: snez a2, a4
+; RV32-NEXT: sltu a5, a3, a5
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and t6, a2, a5
+; RV32-NEXT: bnez t1, .LBB3_10
; RV32-NEXT: .LBB3_12: # %_udiv-special-cases
-; RV32-NEXT: srli a3, a5, 1
-; RV32-NEXT: or a3, a5, a3
-; RV32-NEXT: srli a4, a3, 2
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 8
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: not a3, a3
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: and a4, a4, t5
-; RV32-NEXT: sub a3, a3, a4
-; RV32-NEXT: and a4, a3, t4
-; RV32-NEXT: srli a3, a3, 2
-; RV32-NEXT: and a3, a3, t4
-; RV32-NEXT: add a3, a4, a3
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: and a3, a3, t3
-; RV32-NEXT: slli a4, a3, 8
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: slli a4, a3, 16
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: srli a3, a3, 24
-; RV32-NEXT: addi s1, a3, 32
-; RV32-NEXT: bnez t0, .LBB3_14
+; RV32-NEXT: srli a2, t0, 1
+; RV32-NEXT: or a2, t0, a2
+; RV32-NEXT: srli a5, a2, 2
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: srli a5, a2, 4
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: srli a5, a2, 8
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: srli a5, a2, 16
+; RV32-NEXT: or a2, a2, a5
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: srli a5, a2, 1
+; RV32-NEXT: and a5, a5, t5
+; RV32-NEXT: sub a2, a2, a5
+; RV32-NEXT: and a5, a2, t4
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: and a2, a2, t4
+; RV32-NEXT: add a2, a5, a2
+; RV32-NEXT: srli a5, a2, 4
+; RV32-NEXT: add a2, a2, a5
+; RV32-NEXT: and a2, a2, t3
+; RV32-NEXT: slli a5, a2, 8
+; RV32-NEXT: add a2, a2, a5
+; RV32-NEXT: slli a5, a2, 16
+; RV32-NEXT: add a2, a2, a5
+; RV32-NEXT: srli a2, a2, 24
+; RV32-NEXT: addi s1, a2, 32
+; RV32-NEXT: bnez a4, .LBB3_14
; RV32-NEXT: .LBB3_13: # %_udiv-special-cases
-; RV32-NEXT: mv s1, a2
+; RV32-NEXT: mv s1, a3
; RV32-NEXT: .LBB3_14: # %_udiv-special-cases
; RV32-NEXT: lw a7, 0(a1)
; RV32-NEXT: lw t0, 4(a1)
@@ -1344,179 +1344,179 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: add s2, s2, s7
; RV32-NEXT: srli s2, s2, 24
; RV32-NEXT: .LBB3_21: # %_udiv-special-cases
-; RV32-NEXT: or s7, a2, a0
-; RV32-NEXT: or a3, s6, a3
+; RV32-NEXT: or a2, a2, a0
+; RV32-NEXT: or s7, s6, a3
; RV32-NEXT: bnez a5, .LBB3_23
; RV32-NEXT: # %bb.22: # %_udiv-special-cases
; RV32-NEXT: li s2, 128
; RV32-NEXT: .LBB3_23: # %_udiv-special-cases
-; RV32-NEXT: or a2, a3, s7
+; RV32-NEXT: or a3, s7, a2
; RV32-NEXT: addi a0, a4, 64
; RV32-NEXT: addi s6, s2, 128
-; RV32-NEXT: or a5, s4, s7
-; RV32-NEXT: or s4, s5, a3
+; RV32-NEXT: or a5, s4, a2
+; RV32-NEXT: or s4, s5, s7
; RV32-NEXT: or s5, s4, a5
; RV32-NEXT: sltu s4, s6, s2
; RV32-NEXT: bnez s5, .LBB3_26
; RV32-NEXT: # %bb.24: # %_udiv-special-cases
; RV32-NEXT: mv s2, s4
-; RV32-NEXT: snez s3, s3
-; RV32-NEXT: beqz s7, .LBB3_27
+; RV32-NEXT: snez a5, s3
+; RV32-NEXT: beqz a2, .LBB3_27
; RV32-NEXT: .LBB3_25:
-; RV32-NEXT: srli a3, s7, 1
-; RV32-NEXT: or a3, s7, a3
-; RV32-NEXT: srli a4, a3, 2
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 8
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: not a3, a3
-; RV32-NEXT: srli a4, a3, 1
+; RV32-NEXT: srli a4, a2, 1
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: srli a4, a2, 2
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: srli a4, a2, 4
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: srli a4, a2, 8
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: srli a4, a2, 1
; RV32-NEXT: and a4, a4, t5
-; RV32-NEXT: sub a3, a3, a4
-; RV32-NEXT: and a4, a3, t4
-; RV32-NEXT: srli a3, a3, 2
-; RV32-NEXT: and a3, a3, t4
-; RV32-NEXT: add a3, a4, a3
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: and a3, a3, t3
-; RV32-NEXT: slli a4, a3, 8
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: slli a4, a3, 16
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: sub a2, a2, a4
+; RV32-NEXT: and a4, a2, t4
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: and a2, a2, t4
+; RV32-NEXT: add a2, a4, a2
+; RV32-NEXT: srli a4, a2, 4
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: and a2, a2, t3
+; RV32-NEXT: slli a4, a2, 8
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: slli a4, a2, 16
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: srli a4, a2, 24
; RV32-NEXT: j .LBB3_28
; RV32-NEXT: .LBB3_26:
-; RV32-NEXT: snez a5, a2
+; RV32-NEXT: snez a5, a3
; RV32-NEXT: sltu a4, a0, a4
; RV32-NEXT: addi a5, a5, -1
; RV32-NEXT: and s2, a5, a4
-; RV32-NEXT: snez s3, s3
-; RV32-NEXT: bnez s7, .LBB3_25
+; RV32-NEXT: snez a5, s3
+; RV32-NEXT: bnez a2, .LBB3_25
; RV32-NEXT: .LBB3_27: # %_udiv-special-cases
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 2
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 8
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: srli a4, a3, 16
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: not a3, a3
-; RV32-NEXT: srli a4, a3, 1
+; RV32-NEXT: srli a2, s7, 1
+; RV32-NEXT: or a2, s7, a2
+; RV32-NEXT: srli a4, a2, 2
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: srli a4, a2, 4
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: srli a4, a2, 8
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: srli a4, a2, 16
+; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: srli a4, a2, 1
; RV32-NEXT: and a4, a4, t5
-; RV32-NEXT: sub a3, a3, a4
-; RV32-NEXT: and a4, a3, t4
-; RV32-NEXT: srli a3, a3, 2
-; RV32-NEXT: and a3, a3, t4
-; RV32-NEXT: add a3, a4, a3
-; RV32-NEXT: srli a4, a3, 4
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: and a3, a3, t3
-; RV32-NEXT: slli a4, a3, 8
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: slli a4, a3, 16
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: srli a3, a3, 24
-; RV32-NEXT: addi a4, a3, 32
+; RV32-NEXT: sub a2, a2, a4
+; RV32-NEXT: and a4, a2, t4
+; RV32-NEXT: srli a2, a2, 2
+; RV32-NEXT: and a2, a2, t4
+; RV32-NEXT: add a2, a4, a2
+; RV32-NEXT: srli a4, a2, 4
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: and a2, a2, t3
+; RV32-NEXT: slli a4, a2, 8
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: slli a4, a2, 16
+; RV32-NEXT: add a2, a2, a4
+; RV32-NEXT: srli a2, a2, 24
+; RV32-NEXT: addi a4, a2, 32
; RV32-NEXT: .LBB3_28: # %_udiv-special-cases
-; RV32-NEXT: xori a3, s0, 1
-; RV32-NEXT: addi s3, s3, -1
-; RV32-NEXT: bnez a2, .LBB3_30
+; RV32-NEXT: xori a2, s0, 1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: bnez a3, .LBB3_30
; RV32-NEXT: # %bb.29: # %_udiv-special-cases
; RV32-NEXT: mv a4, a0
; RV32-NEXT: .LBB3_30: # %_udiv-special-cases
; RV32-NEXT: andi s11, a1, 1
; RV32-NEXT: andi a0, t2, 1
-; RV32-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a2, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT: or s9, a1, a2
-; RV32-NEXT: or a5, a7, a6
-; RV32-NEXT: neg a1, a3
-; RV32-NEXT: and t2, s3, s0
+; RV32-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a3, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: or s9, a1, a3
+; RV32-NEXT: or a3, a7, a6
+; RV32-NEXT: neg a1, a2
+; RV32-NEXT: and s0, a5, s0
; RV32-NEXT: bnez s5, .LBB3_32
; RV32-NEXT: # %bb.31: # %_udiv-special-cases
; RV32-NEXT: mv a4, s6
; RV32-NEXT: .LBB3_32: # %_udiv-special-cases
-; RV32-NEXT: lw a2, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a3, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: or s10, a2, a3
+; RV32-NEXT: lw a2, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: or s10, a2, a5
; RV32-NEXT: or a2, s9, a0
-; RV32-NEXT: or a3, t0, t1
-; RV32-NEXT: or t4, a5, s11
-; RV32-NEXT: and a1, t2, a1
-; RV32-NEXT: xori a5, s4, 1
+; RV32-NEXT: or a5, t0, t1
+; RV32-NEXT: or t4, a3, s11
+; RV32-NEXT: and a1, s0, a1
+; RV32-NEXT: xori a3, s4, 1
; RV32-NEXT: snez t2, s5
-; RV32-NEXT: neg a5, a5
+; RV32-NEXT: neg a3, a3
; RV32-NEXT: addi t2, t2, -1
; RV32-NEXT: and t3, t2, s4
; RV32-NEXT: sltu t2, s1, a4
-; RV32-NEXT: and t3, t3, a5
-; RV32-NEXT: mv a5, t2
+; RV32-NEXT: and t3, t3, a3
+; RV32-NEXT: mv a3, t2
; RV32-NEXT: beq t6, s2, .LBB3_34
; RV32-NEXT: # %bb.33: # %_udiv-special-cases
-; RV32-NEXT: sltu a5, t6, s2
+; RV32-NEXT: sltu a3, t6, s2
; RV32-NEXT: .LBB3_34: # %_udiv-special-cases
; RV32-NEXT: or a2, a2, s10
-; RV32-NEXT: or a3, t4, a3
+; RV32-NEXT: or a5, t4, a5
; RV32-NEXT: sltu t5, a1, t3
-; RV32-NEXT: mv t4, a5
+; RV32-NEXT: mv t4, a3
; RV32-NEXT: beq a1, t3, .LBB3_36
; RV32-NEXT: # %bb.35: # %_udiv-special-cases
; RV32-NEXT: mv t4, t5
; RV32-NEXT: .LBB3_36: # %_udiv-special-cases
; RV32-NEXT: seqz a2, a2
-; RV32-NEXT: seqz a3, a3
+; RV32-NEXT: seqz a5, a5
; RV32-NEXT: andi t4, t4, 1
; RV32-NEXT: sub t6, t6, s2
; RV32-NEXT: sub a1, a1, t3
; RV32-NEXT: sub t2, t6, t2
-; RV32-NEXT: sltu t3, a1, a5
+; RV32-NEXT: sltu t3, a1, a3
; RV32-NEXT: add t3, t5, t3
; RV32-NEXT: neg t3, t3
-; RV32-NEXT: sub t5, a1, a5
+; RV32-NEXT: sub t5, a1, a3
; RV32-NEXT: or a1, t5, t3
-; RV32-NEXT: sub t6, s1, a4
+; RV32-NEXT: sub a3, s1, a4
; RV32-NEXT: beqz a1, .LBB3_38
; RV32-NEXT: # %bb.37: # %_udiv-special-cases
; RV32-NEXT: snez a1, a1
-; RV32-NEXT: or a2, a2, a3
+; RV32-NEXT: or a2, a2, a5
; RV32-NEXT: bnez t4, .LBB3_39
; RV32-NEXT: j .LBB3_40
; RV32-NEXT: .LBB3_38:
; RV32-NEXT: snez a1, t2
-; RV32-NEXT: sltiu a4, t6, 129
+; RV32-NEXT: sltiu a4, a3, 129
; RV32-NEXT: xori a4, a4, 1
; RV32-NEXT: or a1, a4, a1
-; RV32-NEXT: or a2, a2, a3
+; RV32-NEXT: or a2, a2, a5
; RV32-NEXT: beqz t4, .LBB3_40
; RV32-NEXT: .LBB3_39: # %_udiv-special-cases
; RV32-NEXT: mv a1, t4
; RV32-NEXT: .LBB3_40: # %_udiv-special-cases
-; RV32-NEXT: or a5, a2, a1
-; RV32-NEXT: addi a4, a5, -1
-; RV32-NEXT: and s0, s11, a4
-; RV32-NEXT: and a3, a4, t1
-; RV32-NEXT: and a2, a4, a6
-; RV32-NEXT: and a1, a4, t0
-; RV32-NEXT: and a4, a4, a7
-; RV32-NEXT: bnez a5, .LBB3_57
+; RV32-NEXT: or t6, a2, a1
+; RV32-NEXT: addi a5, t6, -1
+; RV32-NEXT: and s0, s11, a5
+; RV32-NEXT: and a4, a5, t1
+; RV32-NEXT: and a2, a5, a6
+; RV32-NEXT: and a1, a5, t0
+; RV32-NEXT: and a5, a5, a7
+; RV32-NEXT: bnez t6, .LBB3_57
; RV32-NEXT: # %bb.41: # %_udiv-special-cases
-; RV32-NEXT: or a5, t2, t3
-; RV32-NEXT: xori s1, t6, 128
+; RV32-NEXT: or t6, t2, t3
+; RV32-NEXT: xori s1, a3, 128
; RV32-NEXT: or s1, s1, t4
; RV32-NEXT: or s1, s1, t5
-; RV32-NEXT: or a5, s1, a5
-; RV32-NEXT: beqz a5, .LBB3_57
+; RV32-NEXT: or t6, s1, t6
+; RV32-NEXT: beqz t6, .LBB3_57
; RV32-NEXT: # %bb.42: # %udiv-bb1
; RV32-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT: addi a1, t6, 1
+; RV32-NEXT: addi a1, a3, 1
; RV32-NEXT: sw zero, 136(sp)
; RV32-NEXT: sw zero, 140(sp)
; RV32-NEXT: sw zero, 144(sp)
@@ -1531,23 +1531,23 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: sw t1, 164(sp)
; RV32-NEXT: sw s11, 168(sp)
; RV32-NEXT: li a2, 128
-; RV32-NEXT: addi a3, sp, 152
-; RV32-NEXT: neg ra, t6
-; RV32-NEXT: seqz a4, a1
-; RV32-NEXT: sub a2, a2, t6
-; RV32-NEXT: add t2, t2, a4
-; RV32-NEXT: andi a4, a2, 31
+; RV32-NEXT: addi a4, sp, 152
+; RV32-NEXT: neg ra, a3
+; RV32-NEXT: seqz a5, a1
+; RV32-NEXT: sub a2, a2, a3
+; RV32-NEXT: add t2, t2, a5
+; RV32-NEXT: andi a3, a2, 31
; RV32-NEXT: srli a2, a2, 3
; RV32-NEXT: or a5, a1, t2
-; RV32-NEXT: xori s8, a4, 31
+; RV32-NEXT: xori s8, a3, 31
; RV32-NEXT: andi a2, a2, 28
; RV32-NEXT: seqz t6, a5
-; RV32-NEXT: sub a2, a3, a2
+; RV32-NEXT: sub a3, a4, a2
; RV32-NEXT: add t6, t5, t6
-; RV32-NEXT: lw a3, 0(a2)
-; RV32-NEXT: lw a5, 4(a2)
-; RV32-NEXT: lw s1, 8(a2)
-; RV32-NEXT: lw a4, 12(a2)
+; RV32-NEXT: lw a2, 0(a3)
+; RV32-NEXT: lw a5, 4(a3)
+; RV32-NEXT: lw s1, 8(a3)
+; RV32-NEXT: lw a4, 12(a3)
; RV32-NEXT: sltu t5, t6, t5
; RV32-NEXT: or s0, a1, t6
; RV32-NEXT: add t3, t3, t5
@@ -1563,22 +1563,22 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: sll s1, s1, ra
; RV32-NEXT: srl s2, s2, s8
; RV32-NEXT: or s2, s1, s2
-; RV32-NEXT: srli s1, a3, 1
+; RV32-NEXT: srli s1, a2, 1
; RV32-NEXT: sll a5, a5, ra
; RV32-NEXT: srl s3, s1, s8
; RV32-NEXT: andi s1, t4, 1
; RV32-NEXT: or s3, a5, s3
; RV32-NEXT: or a5, t5, s1
-; RV32-NEXT: sll t5, a3, ra
+; RV32-NEXT: sll t5, a2, ra
; RV32-NEXT: beqz a5, .LBB3_55
; RV32-NEXT: # %bb.43: # %udiv-preheader
; RV32-NEXT: sw zero, 52(sp) # 4-byte Folded Spill
; RV32-NEXT: sw zero, 48(sp) # 4-byte Folded Spill
; RV32-NEXT: sw zero, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT: li s6, 0
+; RV32-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32-NEXT: li s7, 0
; RV32-NEXT: srli a4, a4, 1
-; RV32-NEXT: lw a2, 16(a2)
+; RV32-NEXT: lw a3, 16(a3)
; RV32-NEXT: sw zero, 104(sp)
; RV32-NEXT: sw zero, 108(sp)
; RV32-NEXT: sw zero, 112(sp)
@@ -1595,82 +1595,81 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: sw t0, 60(sp)
; RV32-NEXT: sw a6, 64(sp)
; RV32-NEXT: sw t1, 68(sp)
-; RV32-NEXT: srli a3, a1, 3
+; RV32-NEXT: srli a2, a1, 3
; RV32-NEXT: addi a5, sp, 56
; RV32-NEXT: andi a6, a1, 31
; RV32-NEXT: or a7, s9, s10
; RV32-NEXT: srl a4, a4, s8
-; RV32-NEXT: andi a3, a3, 28
+; RV32-NEXT: andi a2, a2, 28
; RV32-NEXT: xori a6, a6, 31
; RV32-NEXT: snez a7, a7
-; RV32-NEXT: add a3, a5, a3
+; RV32-NEXT: add a2, a5, a2
; RV32-NEXT: add a0, a0, a7
-; RV32-NEXT: lw a5, 16(a3)
-; RV32-NEXT: lw a7, 0(a3)
-; RV32-NEXT: lw t0, 4(a3)
-; RV32-NEXT: lw t1, 8(a3)
-; RV32-NEXT: lw a3, 12(a3)
-; RV32-NEXT: sll a2, a2, ra
-; RV32-NEXT: or a2, a2, a4
+; RV32-NEXT: lw a5, 16(a2)
+; RV32-NEXT: lw a7, 0(a2)
+; RV32-NEXT: lw t0, 4(a2)
+; RV32-NEXT: lw t1, 8(a2)
+; RV32-NEXT: lw a2, 12(a2)
+; RV32-NEXT: sll a3, a3, ra
+; RV32-NEXT: or a3, a3, a4
; RV32-NEXT: slli a5, a5, 1
-; RV32-NEXT: slli a4, a3, 1
+; RV32-NEXT: slli a4, a2, 1
; RV32-NEXT: slli t4, t1, 1
; RV32-NEXT: slli s4, t0, 1
; RV32-NEXT: sll a5, a5, a6
; RV32-NEXT: sll a4, a4, a6
; RV32-NEXT: sll t4, t4, a6
; RV32-NEXT: sll a6, s4, a6
-; RV32-NEXT: srl a3, a3, a1
-; RV32-NEXT: or s9, a3, a5
-; RV32-NEXT: lw s4, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT: seqz a3, s4
+; RV32-NEXT: srl a2, a2, a1
+; RV32-NEXT: or s9, a2, a5
+; RV32-NEXT: lw s4, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: seqz a2, s4
; RV32-NEXT: srl a5, t1, a1
; RV32-NEXT: or ra, a5, a4
-; RV32-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a5, 20(sp) # 4-byte Folded Reload
; RV32-NEXT: or a4, s4, a5
-; RV32-NEXT: sub a5, a5, a3
-; RV32-NEXT: seqz a3, a4
+; RV32-NEXT: sub s5, a5, a2
+; RV32-NEXT: seqz a2, a4
; RV32-NEXT: srl a4, t0, a1
; RV32-NEXT: or s11, a4, t4
-; RV32-NEXT: lw a4, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT: sub t0, a4, a3
-; RV32-NEXT: sw t0, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu a3, a4, a3
-; RV32-NEXT: addi a0, a0, 1
; RV32-NEXT: lw a4, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: sub s5, a4, a3
+; RV32-NEXT: sub a5, a4, a2
+; RV32-NEXT: sw a5, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a2, a4, a2
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: lw a4, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: sub s6, a4, a2
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: andi a0, a2, 1
+; RV32-NEXT: andi a0, a3, 1
; RV32-NEXT: srl a2, a7, a1
-; RV32-NEXT: or s8, a2, a6
+; RV32-NEXT: or a3, a2, a6
; RV32-NEXT: addi s4, s4, -1
; RV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a5, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: j .LBB3_45
; RV32-NEXT: .LBB3_44: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: lw a2, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT: and t1, a0, a2
-; RV32-NEXT: xor a2, a6, a3
+; RV32-NEXT: lw a6, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: and t1, a0, a6
+; RV32-NEXT: xor a6, a4, s8
; RV32-NEXT: xor a7, ra, t1
-; RV32-NEXT: or a2, a7, a2
-; RV32-NEXT: srli a2, s2, 31
+; RV32-NEXT: or a6, a7, a6
+; RV32-NEXT: srli a6, s2, 31
; RV32-NEXT: sltu a7, ra, t1
; RV32-NEXT: sub t1, ra, t1
; RV32-NEXT: slli ra, s0, 1
-; RV32-NEXT: sub a3, a6, a3
-; RV32-NEXT: srli a6, s3, 31
+; RV32-NEXT: sub a4, a4, s8
+; RV32-NEXT: srli s8, s3, 31
; RV32-NEXT: slli s2, s2, 1
-; RV32-NEXT: sub a5, s11, a5
+; RV32-NEXT: sub a2, s11, a2
; RV32-NEXT: srli s11, t5, 31
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: srli s0, s0, 31
; RV32-NEXT: slli t5, t5, 1
-; RV32-NEXT: or a2, ra, a2
+; RV32-NEXT: or a6, ra, a6
; RV32-NEXT: or t0, a1, t6
-; RV32-NEXT: or a6, s2, a6
-; RV32-NEXT: or s2, t2, t3
+; RV32-NEXT: or s2, s2, s8
+; RV32-NEXT: or s8, t2, t3
; RV32-NEXT: or s3, s3, s11
; RV32-NEXT: or t4, a1, t2
; RV32-NEXT: lw s4, 52(sp) # 4-byte Folded Reload
@@ -1682,98 +1681,98 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
; RV32-NEXT: sub ra, t1, s9
; RV32-NEXT: sltu t1, t1, s9
-; RV32-NEXT: sub a3, a3, a7
-; RV32-NEXT: sub s11, a5, a4
-; RV32-NEXT: or a4, t0, s2
+; RV32-NEXT: sub a4, a4, a7
+; RV32-NEXT: sub s11, a2, a5
+; RV32-NEXT: or a2, t0, s8
; RV32-NEXT: seqz a5, t4
; RV32-NEXT: sub t2, t2, s4
; RV32-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
; RV32-NEXT: or s3, a0, s3
; RV32-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT: or s2, a0, a6
-; RV32-NEXT: or s0, s6, a2
+; RV32-NEXT: or s2, a0, s2
+; RV32-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: or s0, a0, a6
; RV32-NEXT: andi a0, s7, 1
-; RV32-NEXT: sub s9, a3, t1
-; RV32-NEXT: snez a2, a4
-; RV32-NEXT: sltu a3, t6, a5
+; RV32-NEXT: sub s9, a4, t1
+; RV32-NEXT: snez a2, a2
+; RV32-NEXT: sltu a4, t6, a5
; RV32-NEXT: sub t6, t6, a5
; RV32-NEXT: add a2, s1, a2
-; RV32-NEXT: sub t3, t3, a3
-; RV32-NEXT: or a3, a1, t6
+; RV32-NEXT: sub t3, t3, a4
+; RV32-NEXT: or a4, a1, t6
; RV32-NEXT: addi a2, a2, 1
-; RV32-NEXT: or a4, t2, t3
+; RV32-NEXT: or a5, t2, t3
; RV32-NEXT: andi s1, a2, 1
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: or a3, a3, s1
-; RV32-NEXT: sub s8, s10, s8
+; RV32-NEXT: or a4, a4, a5
+; RV32-NEXT: or a4, a4, s1
+; RV32-NEXT: sub a3, s10, a3
; RV32-NEXT: sw zero, 48(sp) # 4-byte Folded Spill
; RV32-NEXT: sw zero, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT: li s6, 0
+; RV32-NEXT: sw zero, 40(sp) # 4-byte Folded Spill
; RV32-NEXT: li s7, 0
-; RV32-NEXT: lw a5, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: beqz a3, .LBB3_56
+; RV32-NEXT: beqz a4, .LBB3_56
; RV32-NEXT: .LBB3_45: # %udiv-do-while
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: srli a2, ra, 31
-; RV32-NEXT: slli a3, s9, 1
-; RV32-NEXT: or a6, a3, a2
-; RV32-NEXT: srli a3, s11, 31
+; RV32-NEXT: slli a4, s9, 1
+; RV32-NEXT: or a4, a4, a2
+; RV32-NEXT: srli a2, s11, 31
; RV32-NEXT: slli ra, ra, 1
-; RV32-NEXT: or ra, ra, a3
-; RV32-NEXT: beq s5, a6, .LBB3_47
+; RV32-NEXT: or ra, ra, a2
+; RV32-NEXT: beq s6, a4, .LBB3_47
; RV32-NEXT: # %bb.46: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: sltu a3, s5, a6
+; RV32-NEXT: sltu a2, s6, a4
; RV32-NEXT: j .LBB3_48
; RV32-NEXT: .LBB3_47: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: lw a2, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a3, a2, ra
+; RV32-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu a2, a2, ra
; RV32-NEXT: .LBB3_48: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: srli a4, s8, 31
+; RV32-NEXT: srli a5, a3, 31
; RV32-NEXT: slli s11, s11, 1
-; RV32-NEXT: slli s8, s8, 1
-; RV32-NEXT: or s11, s11, a4
+; RV32-NEXT: slli a3, a3, 1
+; RV32-NEXT: or s11, s11, a5
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: or s10, s8, a0
-; RV32-NEXT: beq a5, s11, .LBB3_50
+; RV32-NEXT: or s10, a3, a0
+; RV32-NEXT: beq s5, s11, .LBB3_50
; RV32-NEXT: # %bb.49: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: sltu a0, a5, s11
+; RV32-NEXT: sltu a0, s5, s11
; RV32-NEXT: j .LBB3_51
; RV32-NEXT: .LBB3_50: # in Loop: Header=BB3_45 Depth=1
; RV32-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a0, a0, s10
; RV32-NEXT: .LBB3_51: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: lw a2, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT: xor a4, a2, ra
-; RV32-NEXT: xor a5, s5, a6
-; RV32-NEXT: or a4, a4, a5
-; RV32-NEXT: beqz a4, .LBB3_53
+; RV32-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: xor a3, a3, ra
+; RV32-NEXT: xor a5, s6, a4
+; RV32-NEXT: or a3, a3, a5
+; RV32-NEXT: beqz a3, .LBB3_53
; RV32-NEXT: # %bb.52: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: mv a0, a3
+; RV32-NEXT: mv a0, a2
; RV32-NEXT: .LBB3_53: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: srli a3, s9, 31
-; RV32-NEXT: lw a2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: sub a3, a2, a3
-; RV32-NEXT: sub a3, a3, a0
-; RV32-NEXT: slli a0, a3, 31
+; RV32-NEXT: srli a2, s9, 31
+; RV32-NEXT: lw a3, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT: sub a2, a3, a2
+; RV32-NEXT: sub a2, a2, a0
+; RV32-NEXT: slli a0, a2, 31
; RV32-NEXT: srai a0, a0, 31
-; RV32-NEXT: lw a3, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: and a3, a0, a3
-; RV32-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: and s8, a0, a2
-; RV32-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: and a5, a0, a5
-; RV32-NEXT: sltu a4, s10, s8
-; RV32-NEXT: mv s9, a4
-; RV32-NEXT: beq s11, a5, .LBB3_44
+; RV32-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: and a3, a0, a3
+; RV32-NEXT: lw a2, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: and a2, a0, a2
+; RV32-NEXT: sltu a5, s10, a3
+; RV32-NEXT: mv s9, a5
+; RV32-NEXT: beq s11, a2, .LBB3_44
; RV32-NEXT: # %bb.54: # %udiv-do-while
; RV32-NEXT: # in Loop: Header=BB3_45 Depth=1
-; RV32-NEXT: sltu s9, s11, a5
+; RV32-NEXT: sltu s9, s11, a2
; RV32-NEXT: j .LBB3_44
; RV32-NEXT: .LBB3_55:
; RV32-NEXT: sw zero, 52(sp) # 4-byte Folded Spill
@@ -1785,18 +1784,18 @@ define i129 @udiv_i129(i129 %x, i129 %y) nounwind {
; RV32-NEXT: slli a0, s2, 1
; RV32-NEXT: srli a3, s2, 31
; RV32-NEXT: or a2, a0, a2
-; RV32-NEXT: slli a0, s0, 1
+; RV32-NEXT: slli a4, s0, 1
; RV32-NEXT: srli s0, s0, 31
; RV32-NEXT: slli t5, t5, 1
-; RV32-NEXT: or a3, a0, a3
+; RV32-NEXT: or a4, a4, a3
; RV32-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT: or a4, a0, t5
+; RV32-NEXT: or a5, a0, t5
; RV32-NEXT: lw s8, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: .LBB3_57: # %udiv-end
-; RV32-NEXT: sw a4, 0(s8)
+; RV32-NEXT: sw a5, 0(s8)
; RV32-NEXT: sw a1, 4(s8)
; RV32-NEXT: sw a2, 8(s8)
-; RV32-NEXT: sw a3, 12(s8)
+; RV32-NEXT: sw a4, 12(s8)
; RV32-NEXT: sb s0, 16(s8)
; RV32-NEXT: lw ra, 236(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 232(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index 1a696d546a1a3..00f2e012c8b12 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -152,88 +152,77 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $176, %esp
-; X86-NEXT: movl 32(%ebp), %ecx
-; X86-NEXT: movl 36(%ebp), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: sarl $31, %eax
-; X86-NEXT: xorl %eax, %edx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: xorl %eax, %ecx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: movl 28(%ebp), %edx
-; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: xorl %edx, %esi
; X86-NEXT: movl 24(%ebp), %ecx
-; X86-NEXT: xorl %eax, %ecx
-; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: subl %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 52(%ebp), %edi
-; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl 52(%ebp), %ecx
+; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: movl 48(%ebp), %ecx
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl 44(%ebp), %ebx
+; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %esi
; X86-NEXT: xorl %edx, %esi
-; X86-NEXT: subl %edx, %esi
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %edi
-; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: movl 40(%ebp), %edi
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: subl %edx, %edi
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: orl %edi, %eax
-; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: orl %ebx, %ecx
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: sete %dl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: orl %ecx, %esi
; X86-NEXT: sete %cl
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: sete %al
-; X86-NEXT: orb %cl, %al
-; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: bsrl %edi, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: bsrl %eax, %ecx
-; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: orl $32, %ecx
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: bsrl %ebx, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: bsrl %esi, %esi
+; X86-NEXT: orb %dl, %cl
+; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT: bsrl %ebx, %esi
; X86-NEXT: xorl $31, %esi
-; X86-NEXT: orl $32, %esi
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: xorl $31, %edx
+; X86-NEXT: orl $32, %edx
; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: cmovnel %edx, %ebx
-; X86-NEXT: orl $64, %ebx
-; X86-NEXT: movl %eax, %edx
+; X86-NEXT: cmovnel %esi, %edx
+; X86-NEXT: bsrl %eax, %esi
+; X86-NEXT: xorl $31, %esi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: cmovnel %ecx, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: bsrl %eax, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: bsrl %edi, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: orl $32, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmovnel %esi, %ecx
+; X86-NEXT: orl $64, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %ebx, %esi
; X86-NEXT: cmovnel %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: bsrl %eax, %esi
@@ -243,49 +232,68 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: orl $32, %edx
; X86-NEXT: testl %eax, %eax
; X86-NEXT: cmovnel %esi, %edx
-; X86-NEXT: orl $64, %edx
-; X86-NEXT: movl %edi, %esi
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl %edx, %ebx
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: bsrl %ebx, %edi
+; X86-NEXT: xorl $31, %edi
+; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: orl $32, %esi
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovnel %edi, %esi
+; X86-NEXT: orl $64, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: cmovnel %edx, %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: movl $0, %edx
; X86-NEXT: sbbl %edx, %edx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: movl $127, %ecx
-; X86-NEXT: cmpl %ebx, %ecx
-; X86-NEXT: movl $0, %ecx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %ecx
-; X86-NEXT: movl $0, %ecx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: jne .LBB4_1
+; X86-NEXT: # %bb.2: # %select.false.sink
+; X86-NEXT: movl $127, %ecx
+; X86-NEXT: cmpl %eax, %ecx
+; X86-NEXT: movl $0, %ecx
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl $0, %ecx
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %edi, %ecx
; X86-NEXT: setb %cl
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: cmovnel %edi, %esi
+; X86-NEXT: .LBB4_3: # %select.end
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: cmovnel %edi, %edx
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: testb %cl, %cl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovnel %edi, %eax
+; X86-NEXT: cmovnel %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: cmovnel %edi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: cmovnel %edi, %esi
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: jne .LBB4_8
-; X86-NEXT: # %bb.1: # %_udiv-special-cases
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: xorl $127, %ebx
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: jne .LBB4_4
+; X86-NEXT: # %bb.10: # %select.end
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: xorl $127, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: orl %ebx, %ecx
+; X86-NEXT: orl %eax, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: je .LBB4_9
-; X86-NEXT: # %bb.2: # %udiv-bb1
+; X86-NEXT: je .LBB4_11
+; X86-NEXT: # %bb.8: # %udiv-bb1
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: xorps %xmm0, %xmm0
@@ -296,121 +304,112 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: xorb $127, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
-; X86-NEXT: movl 152(%esp,%eax), %ebx
+; X86-NEXT: movl 152(%esp,%eax), %edx
; X86-NEXT: movl 156(%esp,%eax), %esi
-; X86-NEXT: shldl %cl, %ebx, %esi
+; X86-NEXT: shldl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 144(%esp,%eax), %esi
; X86-NEXT: movl 148(%esp,%eax), %eax
-; X86-NEXT: shldl %cl, %eax, %ebx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl %cl, %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll %cl, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl $1, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: jae .LBB4_5
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jmp .LBB4_7
-; X86-NEXT: .LBB4_5: # %udiv-preheader
+; X86-NEXT: addl $1, %ebx
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: jb .LBB4_9
+; X86-NEXT: # %bb.5: # %udiv-preheader
+; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %eax
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl 108(%esp,%eax), %edi
+; X86-NEXT: movl 108(%esp,%eax), %ebx
+; X86-NEXT: movl 104(%esp,%eax), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: shrdl %cl, %ebx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 96(%esp,%eax), %esi
+; X86-NEXT: movl 100(%esp,%eax), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: shrdl %cl, %edx, %eax
+; X86-NEXT: shrl %cl, %ebx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 104(%esp,%eax), %ebx
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrdl %cl, %edi, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 96(%esp,%eax), %edx
-; X86-NEXT: movl 100(%esp,%eax), %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shrdl %cl, %ebx, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shrl %cl, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: addl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $-1, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shrdl %cl, %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: addl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB4_6: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: shldl $1, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl $1, %eax, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: shldl $1, %esi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: shldl $1, %esi, %edx
-; X86-NEXT: shldl $1, %ecx, %esi
-; X86-NEXT: shldl $1, %ebx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $1, %ecx, %ebx
-; X86-NEXT: orl %eax, %ebx
+; X86-NEXT: shldl $1, %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $1, %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: orl %edx, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %ecx, %edi
+; X86-NEXT: orl %edx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: shldl $1, %edi, %ecx
-; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: orl %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %edi, %edi
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT: cmpl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: sbbl %esi, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %ecx, %edi
; X86-NEXT: andl $1, %edi
@@ -424,73 +423,70 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: subl %ecx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edi, %esi
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: addl $-1, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: adcl $-1, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: adcl $-1, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: adcl $-1, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: adcl $-1, %edi
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edi, %ecx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: orl %esi, %edx
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: jne .LBB4_6
; X86-NEXT: .LBB4_7: # %udiv-loop-exit
-; X86-NEXT: shldl $1, %ebx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl $1, %eax, %ebx
-; X86-NEXT: movl %ebx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shldl $1, %esi, %eax
+; X86-NEXT: shldl $1, %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: leal (%edi,%esi,2), %edi
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: .LBB4_8: # %udiv-end
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: .LBB4_9: # %udiv-end
-; X86-NEXT: xorl %ecx, %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: xorl %ecx, %ebx
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: xorl %ecx, %edi
-; X86-NEXT: subl %ecx, %edi
-; X86-NEXT: sbbl %ecx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: sbbl %ecx, %esi
-; X86-NEXT: movl 56(%ebp), %ecx
-; X86-NEXT: movl %edi, (%ecx)
-; X86-NEXT: movl %eax, 4(%ecx)
-; X86-NEXT: movl %ebx, 8(%ecx)
-; X86-NEXT: movl %esi, 12(%ecx)
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: movl 40(%ebp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: leal (%edi,%edx,2), %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: .LBB4_11: # %udiv-end
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: xorl %edx, %ebx
+; X86-NEXT: subl %edx, %ebx
+; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 56(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: mull 40(%ebp)
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl %esi, %edi
; X86-NEXT: movl 44(%ebp), %esi
; X86-NEXT: mull %esi
@@ -498,22 +494,23 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: setb %bl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull 44(%ebp)
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT: movzbl %bl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl 40(%ebp), %eax
-; X86-NEXT: imull %eax, %edi
-; X86-NEXT: mull %ebx
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: imull 44(%ebp), %ebx
-; X86-NEXT: addl %edx, %ebx
-; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: imull 44(%ebp), %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: addl %ecx, %edi
; X86-NEXT: movl 48(%ebp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: imull %esi, %ecx
@@ -524,28 +521,38 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: addl %edx, %esi
; X86-NEXT: addl %ecx, %esi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: adcl %ebx, %esi
+; X86-NEXT: adcl %edi, %esi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl 24(%ebp), %edx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl 32(%ebp), %edi
-; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: movl 36(%ebp), %ebx
-; X86-NEXT: sbbl %esi, %ebx
+; X86-NEXT: movl 32(%ebp), %ebx
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
+; X86-NEXT: .LBB4_1:
+; X86-NEXT: movb $1, %cl
+; X86-NEXT: jmp .LBB4_3
+; X86-NEXT: .LBB4_9:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: jmp .LBB4_7
+; X86-NEXT: .LBB4_4:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jmp .LBB4_11
;
; X64-LABEL: scalar_i128:
; X64: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 7f5ede7a858d2..3d756f3cf2141 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -152,359 +152,362 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $160, %esp
-; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %ebx
; X86-NEXT: movl 40(%ebp), %ecx
-; X86-NEXT: movl 52(%ebp), %edi
-; X86-NEXT: movl 44(%ebp), %eax
-; X86-NEXT: orl %edi, %eax
-; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: sete %bl
+; X86-NEXT: sete %cl
; X86-NEXT: movl 28(%ebp), %eax
; X86-NEXT: orl 36(%ebp), %eax
; X86-NEXT: movl 24(%ebp), %edx
; X86-NEXT: orl 32(%ebp), %edx
; X86-NEXT: orl %eax, %edx
; X86-NEXT: sete %al
-; X86-NEXT: orb %bl, %al
+; X86-NEXT: orb %cl, %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: bsrl %edi, %edx
+; X86-NEXT: bsrl %esi, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl %esi, %ecx
+; X86-NEXT: bsrl %ebx, %ecx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: orl $32, %ecx
-; X86-NEXT: testl %edi, %edi
+; X86-NEXT: testl %esi, %esi
; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: movl 44(%ebp), %eax
-; X86-NEXT: bsrl %eax, %edx
+; X86-NEXT: bsrl %edi, %edx
; X86-NEXT: xorl $31, %edx
; X86-NEXT: bsrl 40(%ebp), %ebx
; X86-NEXT: xorl $31, %ebx
; X86-NEXT: orl $32, %ebx
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %edx, %ebx
; X86-NEXT: orl $64, %ebx
-; X86-NEXT: orl %edi, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: orl %esi, %edx
; X86-NEXT: cmovnel %ecx, %ebx
-; X86-NEXT: movl 36(%ebp), %esi
-; X86-NEXT: bsrl %esi, %edx
+; X86-NEXT: movl 36(%ebp), %edi
+; X86-NEXT: bsrl %edi, %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl 32(%ebp), %eax
-; X86-NEXT: bsrl %eax, %ecx
+; X86-NEXT: bsrl 32(%ebp), %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: orl $32, %ecx
-; X86-NEXT: testl %esi, %esi
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: movl 28(%ebp), %edi
-; X86-NEXT: bsrl %edi, %esi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: bsrl %eax, %esi
; X86-NEXT: xorl $31, %esi
; X86-NEXT: bsrl 24(%ebp), %edx
; X86-NEXT: xorl $31, %edx
; X86-NEXT: orl $32, %edx
-; X86-NEXT: testl %edi, %edi
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: movl 32(%ebp), %eax
; X86-NEXT: cmovnel %esi, %edx
; X86-NEXT: orl $64, %edx
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: orl %edi, %esi
; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: subl %edx, %ebx
-; X86-NEXT: movl $0, %eax
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: movl %ebx, %edx
; X86-NEXT: movl $0, %esi
; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: movl $127, %edx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %ebx, %edx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %ecx, %edx
-; X86-NEXT: movl $0, %edx
+; X86-NEXT: movl $0, %ebx
+; X86-NEXT: sbbl %ebx, %ebx
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: setb %dl
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
-; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: jne .LBB4_1
+; X86-NEXT: # %bb.2: # %select.false.sink
+; X86-NEXT: movl $127, %eax
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: sbbl %ebx, %eax
; X86-NEXT: movl $0, %eax
-; X86-NEXT: cmovnel %eax, %edx
-; X86-NEXT: movl 32(%ebp), %ebx
-; X86-NEXT: cmovnel %eax, %ebx
-; X86-NEXT: movl 28(%ebp), %edi
-; X86-NEXT: cmovnel %eax, %edi
-; X86-NEXT: movl $0, %ecx
+; X86-NEXT: sbbl %edi, %eax
+; X86-NEXT: setb %al
+; X86-NEXT: .LBB4_3: # %select.end
+; X86-NEXT: testb %al, %al
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: cmovnel %ecx, %edx
; X86-NEXT: movl 24(%ebp), %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 56(%ebp), %esi
-; X86-NEXT: jne .LBB4_6
-; X86-NEXT: # %bb.1: # %_udiv-special-cases
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl 32(%ebp), %edi
+; X86-NEXT: cmovnel %ecx, %edi
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: cmovnel %ecx, %ebx
+; X86-NEXT: jne .LBB4_9
+; X86-NEXT: # %bb.4: # %select.end
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: xorl $127, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: je .LBB4_6
-; X86-NEXT: # %bb.2: # %udiv-bb1
+; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: je .LBB4_9
+; X86-NEXT: # %bb.5: # %udiv-bb1
; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %edx
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 32(%ebp), %esi
-; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 36(%ebp), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: movl 36(%ebp), %ecx
+; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: xorb $127, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
-; X86-NEXT: movl 136(%esp,%eax), %edx
-; X86-NEXT: movl 140(%esp,%eax), %edi
-; X86-NEXT: shldl %cl, %edx, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 128(%esp,%eax), %esi
-; X86-NEXT: movl 132(%esp,%eax), %eax
-; X86-NEXT: shldl %cl, %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl %cl, %esi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shll %cl, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: addl $1, %ebx
+; X86-NEXT: movl 136(%esp,%eax), %esi
+; X86-NEXT: movl 140(%esp,%eax), %ebx
+; X86-NEXT: shldl %cl, %esi, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: jae .LBB4_3
-; X86-NEXT: # %bb.7:
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: jmp .LBB4_5
-; X86-NEXT: .LBB4_3: # %udiv-preheader
+; X86-NEXT: movl 128(%esp,%eax), %edi
+; X86-NEXT: movl 132(%esp,%eax), %ebx
+; X86-NEXT: shldl %cl, %ebx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %ebx
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl $1, %edx
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: jb .LBB4_10
+; X86-NEXT: # %bb.6: # %udiv-preheader
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 24(%ebp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 28(%ebp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 32(%ebp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 36(%ebp), %edx
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl 92(%esp,%eax), %edi
-; X86-NEXT: movl 88(%esp,%eax), %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shrdl %cl, %edi, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 80(%esp,%eax), %esi
+; X86-NEXT: movl 92(%esp,%eax), %esi
+; X86-NEXT: movl 88(%esp,%eax), %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: shrdl %cl, %esi, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 80(%esp,%eax), %edx
; X86-NEXT: movl 84(%esp,%eax), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shrdl %cl, %edx, %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: shrl %cl, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shrdl %cl, %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrl %cl, %esi
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shrdl %cl, %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%ebp), %eax
-; X86-NEXT: addl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 44(%ebp), %eax
-; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 48(%ebp), %eax
-; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 52(%ebp), %eax
-; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrdl %cl, %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: addl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 44(%ebp), %ecx
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%ebp), %ecx
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 52(%ebp), %ecx
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB4_4: # %udiv-do-while
+; X86-NEXT: .LBB4_7: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $1, %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: shldl $1, %edx, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $1, %edx, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $1, %ebx, %edx
-; X86-NEXT: shldl $1, %ecx, %ebx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %eax, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: orl %esi, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl $1, %eax, %ecx
-; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: shldl $1, %esi, %eax
+; X86-NEXT: shldl $1, %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %ebx, %ecx
+; X86-NEXT: orl %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $1, %ecx, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %ecx, %ebx
+; X86-NEXT: orl %edx, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %ecx, %ecx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: cmpl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: sbbl %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl 52(%ebp), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andl 48(%ebp), %eax
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl $1, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl 52(%ebp), %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: andl 48(%ebp), %esi
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: andl 44(%ebp), %ebx
; X86-NEXT: andl 40(%ebp), %ecx
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %ebx, %edi
-; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: addl $-1, %ecx
-; X86-NEXT: adcl $-1, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: adcl $-1, %esi
+; X86-NEXT: sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: addl $-1, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: adcl $-1, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: adcl $-1, %eax
; X86-NEXT: adcl $-1, %edi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edi, %eax
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: orl %edi, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: jne .LBB4_4
-; X86-NEXT: .LBB4_5: # %udiv-loop-exit
+; X86-NEXT: jne .LBB4_7
+; X86-NEXT: .LBB4_8: # %udiv-loop-exit
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %ecx, %eax
+; X86-NEXT: shldl $1, %ebx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $1, %ecx, %edx
-; X86-NEXT: shldl $1, %eax, %ecx
+; X86-NEXT: shldl $1, %edx, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: shldl $1, %esi, %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: leal (%eax,%esi,2), %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: movl 56(%ebp), %esi
-; X86-NEXT: .LBB4_6: # %udiv-end
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, (%esi)
-; X86-NEXT: movl %edi, 4(%esi)
-; X86-NEXT: movl %ebx, 8(%esi)
-; X86-NEXT: movl %edx, 12(%esi)
+; X86-NEXT: leal (%esi,%edx,2), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: .LBB4_9: # %udiv-end
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl 48(%ebp), %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: imull %edi, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl 52(%ebp), %eax
-; X86-NEXT: imull %edi, %eax
-; X86-NEXT: addl %edx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%ebp), %edi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: imull %edi, %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: movl 44(%ebp), %esi
-; X86-NEXT: imull %esi, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl 56(%ebp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %edi
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl 48(%ebp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull %edx, %ecx
+; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: mull %edi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl 52(%ebp), %edi
+; X86-NEXT: imull %esi, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull 40(%ebp), %ebx
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: movl 44(%ebp), %eax
+; X86-NEXT: imull %eax, %esi
+; X86-NEXT: addl %ebx, %esi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %esi, %ecx
; X86-NEXT: adcl $0, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull 44(%ebp)
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ecx, %esi
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %esi, %edi
; X86-NEXT: setb %cl
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull 44(%ebp)
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: adcl %ebx, %edx
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl 28(%ebp), %ecx
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: movl 32(%ebp), %esi
-; X86-NEXT: sbbl %eax, %esi
-; X86-NEXT: movl 36(%ebp), %ebx
-; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: movl 28(%ebp), %ebx
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edi, (%eax)
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %esi, 8(%eax)
-; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
+; X86-NEXT: .LBB4_1:
+; X86-NEXT: movb $1, %al
+; X86-NEXT: jmp .LBB4_3
+; X86-NEXT: .LBB4_10:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: jmp .LBB4_8
;
; X64-LABEL: scalar_i128:
; X64: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll
index 412455384e937..b3cb7401e6402 100644
--- a/llvm/test/CodeGen/X86/pr38539.ll
+++ b/llvm/test/CodeGen/X86/pr38539.ll
@@ -22,10 +22,10 @@ define void @f() nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
-; X86-NEXT: subl $160, %esp
+; X86-NEXT: subl $176, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movzbl (%eax), %eax
; X86-NEXT: movzbl (%eax), %ecx
; X86-NEXT: movzbl %al, %eax
@@ -37,17 +37,18 @@ define void @f() nounwind {
; X86-NEXT: sarl $30, %ecx
; X86-NEXT: sarl $31, %eax
; X86-NEXT: xorl %eax, %edi
-; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %ebx
; X86-NEXT: shrdl $1, %eax, %ecx
; X86-NEXT: xorl %ecx, %esi
; X86-NEXT: subl %ecx, %esi
-; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl %eax, %ebx
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: shldl $30, %edx, %ecx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $30, %ebx, %ecx
+; X86-NEXT: movl %ebx, %edx
; X86-NEXT: shldl $30, %esi, %edx
; X86-NEXT: testl %ecx, %ecx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %BB_udiv-special-cases
; X86-NEXT: bsrl %edx, %eax
@@ -75,37 +76,50 @@ define void @f() nounwind {
; X86-NEXT: addl $64, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: .LBB0_8: # %BB_udiv-special-cases
-; X86-NEXT: leal {{[0-9]+}}(%esp), %esi
; X86-NEXT: addl $-66, %eax
; X86-NEXT: movl $0, %ebx
; X86-NEXT: adcl $-1, %ebx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: adcl $3, %edx
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: adcl $3, %esi
+; X86-NEXT: andl $3, %esi
; X86-NEXT: movb $1, %cl
; X86-NEXT: testb %cl, %cl
-; X86-NEXT: jne .LBB0_14
-; X86-NEXT: # %bb.9: # %BB_udiv-special-cases
-; X86-NEXT: andl $3, %edx
+; X86-NEXT: jne .LBB0_10
+; X86-NEXT: # %bb.9: # %select.false.sink
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movl $65, %edx
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %ebx, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: setb %cl
+; X86-NEXT: .LBB0_10: # %select.end
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: testb %cl, %cl
+; X86-NEXT: jne .LBB0_15
+; X86-NEXT: # %bb.11: # %select.end
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $65, %ecx
-; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %ebx, %ecx
-; X86-NEXT: je .LBB0_14
-; X86-NEXT: # %bb.10: # %udiv-bb1
+; X86-NEXT: je .LBB0_15
+; X86-NEXT: # %bb.12: # %udiv-bb1
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl $1, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: andl $3, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: andl $3, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movb $65, %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
-; X86-NEXT: movsbl %al, %esi
+; X86-NEXT: movsbl %al, %edx
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
@@ -114,24 +128,23 @@ define void @f() nounwind {
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 128(%esp,%edx), %eax
+; X86-NEXT: movl 132(%esp,%edx), %edi
+; X86-NEXT: movl 136(%esp,%edx), %edx
+; X86-NEXT: shldl %cl, %edi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %eax, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 112(%esp,%esi), %edi
-; X86-NEXT: movl 116(%esp,%esi), %eax
-; X86-NEXT: movl 120(%esp,%esi), %esi
-; X86-NEXT: shldl %cl, %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: shll %cl, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: shll %cl, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: orl %edx, %eax
+; X86-NEXT: orl %esi, %eax
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %ebx, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: je .LBB0_13
-; X86-NEXT: # %bb.11: # %udiv-preheader
-; X86-NEXT: andl $3, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: je .LBB0_15
+; X86-NEXT: # %bb.13: # %udiv-preheader
+; X86-NEXT: andl $3, %esi
; X86-NEXT: andl $3, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
@@ -146,15 +159,14 @@ define void @f() nounwind {
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl 72(%esp,%eax), %ebx
-; X86-NEXT: movl 64(%esp,%eax), %esi
-; X86-NEXT: movl 68(%esp,%eax), %edx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: shrdl %cl, %ebx, %eax
-; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl 88(%esp,%eax), %edi
+; X86-NEXT: movl 80(%esp,%eax), %ebx
+; X86-NEXT: movl 84(%esp,%eax), %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: shrdl %cl, %edi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrdl %cl, %edx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrdl %cl, %eax, %ebx
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl $-1, %eax
@@ -162,78 +174,77 @@ define void @f() nounwind {
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl $3, %edi
-; X86-NEXT: andl $3, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl $3, %esi
+; X86-NEXT: andl $3, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB0_12: # %udiv-do-while
+; X86-NEXT: .LBB0_14: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: shldl $1, %ebx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $1, %ebx, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: andl $2, %edx
-; X86-NEXT: shrl %edx
-; X86-NEXT: leal (%edx,%ebx,2), %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $1, %edx, %edi
+; X86-NEXT: shldl $1, %edx, %ecx
+; X86-NEXT: shldl $1, %ebx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: andl $2, %esi
+; X86-NEXT: shrl %esi
+; X86-NEXT: leal (%esi,%ebx,2), %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl $1, %esi, %edi
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl $1, %eax, %edx
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %eax, %esi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %eax, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl $3, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: sbbl %ecx, %edx
-; X86-NEXT: shll $30, %edx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: sarl $30, %edi
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: shrdl $1, %edx, %edi
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: shll $30, %edi
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: sarl $30, %esi
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: shrdl $1, %edi, %esi
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl %edx, %eax
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: subl %edi, %ebx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: subl %esi, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %esi
-; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %eax, %ecx
; X86-NEXT: andl $3, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl $-1, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: adcl $-1, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: adcl $3, %edi
-; X86-NEXT: andl $3, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: adcl $-1, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: adcl $3, %ebx
+; X86-NEXT: andl $3, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edi, %eax
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: jne .LBB0_12
-; X86-NEXT: .LBB0_13: # %udiv-loop-exit
-; X86-NEXT: leal {{[0-9]+}}(%esp), %esi
-; X86-NEXT: .LBB0_14: # %udiv-end
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: jne .LBB0_14
+; X86-NEXT: .LBB0_15: # %udiv-end
; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
; X86-NEXT: setne (%eax)
-; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%eax)
; X86-NEXT: movb $0, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/sdiv129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/sdiv129.ll
index fc823cd543144..751bdbade15d9 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/sdiv129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/sdiv129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
-define void @sdiv129(ptr %ptr, ptr %out) nounwind {
+define void @sdiv129(ptr %ptr, ptr %out) nounwind !prof !0 {
; CHECK-LABEL: @sdiv129(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
@@ -24,11 +24,11 @@ define void @sdiv129(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP15:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP10]], i1 true)
; CHECK-NEXT: [[TMP16:%.*]] = sub i129 [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp ugt i129 [[TMP16]], 128
-; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP13]], i1 true, i1 [[TMP17]]
+; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP13]], i1 true, i1 [[TMP17]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i129 [[TMP16]], 128
-; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP18]], i129 0, i129 [[TMP10]]
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP18]], i1 true, i1 [[TMP19]]
-; CHECK-NEXT: br i1 [[TMP21]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP18]], i129 0, i129 [[TMP10]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP18]], i1 true, i1 [[TMP19]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP22:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP37:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP23:%.*]] = phi i129 [ [[TMP46:%.*]], [[UDIV_BB1]] ], [ [[TMP34:%.*]], [[UDIV_DO_WHILE]] ]
@@ -52,7 +52,7 @@ define void @sdiv129(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP39]] = sub i129 [[TMP32]], [[TMP38]]
; CHECK-NEXT: [[TMP40]] = add i129 [[TMP27]], -1
; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i129 [[TMP40]], 0
-; CHECK-NEXT: br i1 [[TMP41]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP41]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP42]] = lshr i129 [[TMP10]], [[TMP44]]
; CHECK-NEXT: [[TMP43]] = add i129 [[TMP9]], -1
@@ -62,7 +62,7 @@ define void @sdiv129(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP45:%.*]] = sub i129 128, [[TMP16]]
; CHECK-NEXT: [[TMP46]] = shl i129 [[TMP10]], [[TMP45]]
; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i129 [[TMP44]], 0
-; CHECK-NEXT: br i1 [[TMP47]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP47]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP48:%.*]] = phi i129 [ [[TMP25]], [[UDIV_LOOP_EXIT]] ], [ [[TMP20]], [[_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP49:%.*]] = xor i129 [[TMP48]], [[TMP8]]
@@ -75,3 +75,13 @@ define void @sdiv129(ptr %ptr, ptr %out) nounwind {
store i129 %res, ptr %out
ret void
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"unknown", !"integer-division"}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/srem129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/srem129.ll
index 667152228d258..45491ccda2b19 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/srem129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/srem129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
-define void @test(ptr %ptr, ptr %out) nounwind {
+define void @test(ptr %ptr, ptr %out) nounwind !prof !0 {
; CHECK-LABEL: @test(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
@@ -25,11 +25,11 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP16:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP11]], i1 true)
; CHECK-NEXT: [[TMP17:%.*]] = sub i129 [[TMP15]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i129 [[TMP17]], 128
-; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP14]], i1 true, i1 [[TMP18]]
+; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP14]], i1 true, i1 [[TMP18]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i129 [[TMP17]], 128
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], i129 0, i129 [[TMP11]]
-; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP19]], i1 true, i1 [[TMP20]]
-; CHECK-NEXT: br i1 [[TMP22]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], i129 0, i129 [[TMP11]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP19]], i1 true, i1 [[TMP20]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP22]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP23:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP38:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP24:%.*]] = phi i129 [ [[TMP47:%.*]], [[UDIV_BB1]] ], [ [[TMP35:%.*]], [[UDIV_DO_WHILE]] ]
@@ -53,7 +53,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP40]] = sub i129 [[TMP33]], [[TMP39]]
; CHECK-NEXT: [[TMP41]] = add i129 [[TMP28]], -1
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i129 [[TMP41]], 0
-; CHECK-NEXT: br i1 [[TMP42]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP43]] = lshr i129 [[TMP11]], [[TMP45]]
; CHECK-NEXT: [[TMP44]] = add i129 [[TMP10]], -1
@@ -63,7 +63,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP46:%.*]] = sub i129 128, [[TMP17]]
; CHECK-NEXT: [[TMP47]] = shl i129 [[TMP11]], [[TMP46]]
; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i129 [[TMP45]], 0
-; CHECK-NEXT: br i1 [[TMP48]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP48]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP49:%.*]] = phi i129 [ [[TMP26]], [[UDIV_LOOP_EXIT]] ], [ [[TMP21]], [[_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP50:%.*]] = mul i129 [[TMP9]], [[TMP49]]
@@ -78,3 +78,13 @@ define void @test(ptr %ptr, ptr %out) nounwind {
store i129 %res, ptr %out
ret void
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"unknown", !"integer-division"}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/udiv129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/udiv129.ll
index b2b83815f79b0..6ad696ae446fd 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/udiv129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/udiv129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
-define void @test(ptr %ptr, ptr %out) nounwind {
+define void @test(ptr %ptr, ptr %out) nounwind !prof !0 {
; CHECK-LABEL: @test(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
@@ -15,11 +15,11 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP6:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP1]], i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = sub i129 [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i129 [[TMP7]], 128
-; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP4]], i1 true, i1 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP4]], i1 true, i1 [[TMP8]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i129 [[TMP7]], 128
-; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i129 0, i129 [[TMP1]]
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP9]], i1 true, i1 [[TMP10]]
-; CHECK-NEXT: br i1 [[TMP12]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i129 0, i129 [[TMP1]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP9]], i1 true, i1 [[TMP10]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP13:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP28:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP14:%.*]] = phi i129 [ [[TMP37:%.*]], [[UDIV_BB1]] ], [ [[TMP25:%.*]], [[UDIV_DO_WHILE]] ]
@@ -43,7 +43,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP30]] = sub i129 [[TMP23]], [[TMP29]]
; CHECK-NEXT: [[TMP31]] = add i129 [[TMP18]], -1
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i129 [[TMP31]], 0
-; CHECK-NEXT: br i1 [[TMP32]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP32]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP33]] = lshr i129 [[TMP1]], [[TMP35]]
; CHECK-NEXT: [[TMP34]] = add i129 [[TMP0]], -1
@@ -53,7 +53,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP36:%.*]] = sub i129 128, [[TMP7]]
; CHECK-NEXT: [[TMP37]] = shl i129 [[TMP1]], [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i129 [[TMP35]], 0
-; CHECK-NEXT: br i1 [[TMP38]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP38]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP39:%.*]] = phi i129 [ [[TMP16]], [[UDIV_LOOP_EXIT]] ], [ [[TMP11]], [[_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: store i129 [[TMP39]], ptr [[OUT:%.*]], align 16
@@ -64,3 +64,13 @@ define void @test(ptr %ptr, ptr %out) nounwind {
store i129 %res, ptr %out
ret void
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"unknown", !"integer-division"}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/urem129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/urem129.ll
index 46e72001b2c2d..a4c4ac2cba329 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/urem129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/urem129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
-define void @test(ptr %ptr, ptr %out) nounwind {
+define void @test(ptr %ptr, ptr %out) nounwind !prof !0 {
; CHECK-LABEL: @test(
; CHECK-NEXT: _udiv-special-cases:
; CHECK-NEXT: [[A:%.*]] = load i129, ptr [[PTR:%.*]], align 16
@@ -17,11 +17,11 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP8:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP3]], i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = sub i129 [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i129 [[TMP9]], 128
-; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], i1 true, i1 [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], i1 true, i1 [[TMP10]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i129 [[TMP9]], 128
-; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], i129 0, i129 [[TMP3]]
-; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP11]], i1 true, i1 [[TMP12]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], i129 0, i129 [[TMP3]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP11]], i1 true, i1 [[TMP12]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP30:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP16:%.*]] = phi i129 [ [[TMP39:%.*]], [[UDIV_BB1]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE]] ]
@@ -45,7 +45,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP32]] = sub i129 [[TMP25]], [[TMP31]]
; CHECK-NEXT: [[TMP33]] = add i129 [[TMP20]], -1
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i129 [[TMP33]], 0
-; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP35]] = lshr i129 [[TMP3]], [[TMP37]]
; CHECK-NEXT: [[TMP36]] = add i129 [[TMP2]], -1
@@ -55,7 +55,7 @@ define void @test(ptr %ptr, ptr %out) nounwind {
; CHECK-NEXT: [[TMP38:%.*]] = sub i129 128, [[TMP9]]
; CHECK-NEXT: [[TMP39]] = shl i129 [[TMP3]], [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i129 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[TMP40]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP40]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP41:%.*]] = phi i129 [ [[TMP18]], [[UDIV_LOOP_EXIT]] ], [ [[TMP13]], [[_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP42:%.*]] = mul i129 [[TMP1]], [[TMP41]]
@@ -68,3 +68,13 @@ define void @test(ptr %ptr, ptr %out) nounwind {
store i129 %res, ptr %out
ret void
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"unknown", !"integer-division"}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/vector.ll b/llvm/test/Transforms/ExpandIRInsts/X86/vector.ll
index 58e74b8d17b55..727e59b5bdef0 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/vector.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/vector.ll
@@ -1,10 +1,10 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
; RUN: opt -S -mtriple=x86_64-- -expand-ir-insts -expand-div-rem-bits 128 < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' -expand-div-rem-bits 128 < %s | FileCheck %s
-define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
+define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind !prof !0 {
; CHECK-LABEL: define <2 x i129> @sdiv129(
-; CHECK-SAME: <2 x i129> [[A:%.*]], <2 x i129> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: <2 x i129> [[A:%.*]], <2 x i129> [[B:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: _udiv-special-cases_udiv-special-cases:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i129> [[A]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i129> [[B]], i64 0
@@ -26,11 +26,11 @@ define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP17:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP12]], i1 true)
; CHECK-NEXT: [[TMP18:%.*]] = sub i129 [[TMP16]], [[TMP17]]
; CHECK-NEXT: [[TMP19:%.*]] = icmp ugt i129 [[TMP18]], 128
-; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP15]], i1 true, i1 [[TMP19]]
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP15]], i1 true, i1 [[TMP19]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i129 [[TMP18]], 128
-; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP20]], i129 0, i129 [[TMP12]]
-; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP21]]
-; CHECK-NEXT: br i1 [[TMP23]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]]
+; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP20]], i129 0, i129 [[TMP12]], !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP21]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP23]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit2:
; CHECK-NEXT: [[TMP24:%.*]] = phi i129 [ 0, [[UDIV_BB15]] ], [ [[TMP39:%.*]], [[UDIV_DO_WHILE3:%.*]] ]
; CHECK-NEXT: [[TMP25:%.*]] = phi i129 [ [[TMP48:%.*]], [[UDIV_BB15]] ], [ [[TMP36:%.*]], [[UDIV_DO_WHILE3]] ]
@@ -54,7 +54,7 @@ define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP41]] = sub i129 [[TMP34]], [[TMP40]]
; CHECK-NEXT: [[TMP42]] = add i129 [[TMP29]], -1
; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i129 [[TMP42]], 0
-; CHECK-NEXT: br i1 [[TMP43]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]]
+; CHECK-NEXT: br i1 [[TMP43]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]], !prof [[PROF1]]
; CHECK: udiv-preheader4:
; CHECK-NEXT: [[TMP44]] = lshr i129 [[TMP12]], [[TMP46]]
; CHECK-NEXT: [[TMP45]] = add i129 [[TMP11]], -1
@@ -64,7 +64,7 @@ define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP47:%.*]] = sub i129 128, [[TMP18]]
; CHECK-NEXT: [[TMP48]] = shl i129 [[TMP12]], [[TMP47]]
; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i129 [[TMP46]], 0
-; CHECK-NEXT: br i1 [[TMP49]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]]
+; CHECK-NEXT: br i1 [[TMP49]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]], !prof [[PROF1]]
; CHECK: udiv-end1:
; CHECK-NEXT: [[TMP50:%.*]] = phi i129 [ [[TMP27]], [[UDIV_LOOP_EXIT2]] ], [ [[TMP22]], [[_UDIV_SPECIAL_CASES_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP51:%.*]] = xor i129 [[TMP50]], [[TMP10]]
@@ -90,11 +90,11 @@ define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP71:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP66]], i1 true)
; CHECK-NEXT: [[TMP72:%.*]] = sub i129 [[TMP70]], [[TMP71]]
; CHECK-NEXT: [[TMP73:%.*]] = icmp ugt i129 [[TMP72]], 128
-; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP69]], i1 true, i1 [[TMP73]]
+; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP69]], i1 true, i1 [[TMP73]], !prof [[PROF1]]
; CHECK-NEXT: [[TMP75:%.*]] = icmp eq i129 [[TMP72]], 128
-; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP74]], i129 0, i129 [[TMP66]]
-; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP74]], i1 true, i1 [[TMP75]]
-; CHECK-NEXT: br i1 [[TMP77]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: [[TMP76:%.*]] = select i1 [[TMP74]], i129 0, i129 [[TMP66]], !prof [[PROF2]]
+; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP74]], i1 true, i1 [[TMP75]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP77]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP78:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP93:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP79:%.*]] = phi i129 [ [[TMP102:%.*]], [[UDIV_BB1]] ], [ [[TMP90:%.*]], [[UDIV_DO_WHILE]] ]
@@ -118,7 +118,7 @@ define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP95]] = sub i129 [[TMP88]], [[TMP94]]
; CHECK-NEXT: [[TMP96]] = add i129 [[TMP83]], -1
; CHECK-NEXT: [[TMP97:%.*]] = icmp eq i129 [[TMP96]], 0
-; CHECK-NEXT: br i1 [[TMP97]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP97]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP98]] = lshr i129 [[TMP66]], [[TMP100]]
; CHECK-NEXT: [[TMP99]] = add i129 [[TMP65]], -1
@@ -128,7 +128,7 @@ define <2 x i129> @sdiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP101:%.*]] = sub i129 128, [[TMP72]]
; CHECK-NEXT: [[TMP102]] = shl i129 [[TMP66]], [[TMP101]]
; CHECK-NEXT: [[TMP103:%.*]] = icmp eq i129 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[TMP103]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP103]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP104:%.*]] = phi i129 [ [[TMP81]], [[UDIV_LOOP_EXIT]] ], [ [[TMP76]], [[UDIV_END1]] ]
; CHECK-NEXT: [[TMP105:%.*]] = xor i129 [[TMP104]], [[TMP64]]
@@ -155,11 +155,11 @@ define <2 x i129> @udiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP8:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP3]], i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = sub i129 [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i129 [[TMP9]], 128
-; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], i1 true, i1 [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], i1 true, i1 [[TMP10]], !prof [[PROF1]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i129 [[TMP9]], 128
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], i129 0, i129 [[TMP3]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP11]], i1 true, i1 [[TMP12]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]]
+; CHECK-NEXT: br i1 [[TMP14]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit2:
; CHECK-NEXT: [[TMP15:%.*]] = phi i129 [ 0, [[UDIV_BB15]] ], [ [[TMP30:%.*]], [[UDIV_DO_WHILE3:%.*]] ]
; CHECK-NEXT: [[TMP16:%.*]] = phi i129 [ [[TMP39:%.*]], [[UDIV_BB15]] ], [ [[TMP27:%.*]], [[UDIV_DO_WHILE3]] ]
@@ -183,7 +183,7 @@ define <2 x i129> @udiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP32]] = sub i129 [[TMP25]], [[TMP31]]
; CHECK-NEXT: [[TMP33]] = add i129 [[TMP20]], -1
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i129 [[TMP33]], 0
-; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]]
+; CHECK-NEXT: br i1 [[TMP34]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]], !prof [[PROF1]]
; CHECK: udiv-preheader4:
; CHECK-NEXT: [[TMP35]] = lshr i129 [[TMP3]], [[TMP37]]
; CHECK-NEXT: [[TMP36]] = add i129 [[TMP2]], -1
@@ -193,7 +193,7 @@ define <2 x i129> @udiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP38:%.*]] = sub i129 128, [[TMP9]]
; CHECK-NEXT: [[TMP39]] = shl i129 [[TMP3]], [[TMP38]]
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i129 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[TMP40]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]]
+; CHECK-NEXT: br i1 [[TMP40]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]], !prof [[PROF1]]
; CHECK: udiv-end1:
; CHECK-NEXT: [[TMP41:%.*]] = phi i129 [ [[TMP18]], [[UDIV_LOOP_EXIT2]] ], [ [[TMP13]], [[_UDIV_SPECIAL_CASES_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <2 x i129> poison, i129 [[TMP41]], i64 0
@@ -208,11 +208,11 @@ define <2 x i129> @udiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP51:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP46]], i1 true)
; CHECK-NEXT: [[TMP52:%.*]] = sub i129 [[TMP50]], [[TMP51]]
; CHECK-NEXT: [[TMP53:%.*]] = icmp ugt i129 [[TMP52]], 128
-; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP49]], i1 true, i1 [[TMP53]]
+; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP49]], i1 true, i1 [[TMP53]], !prof [[PROF1]]
; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i129 [[TMP52]], 128
; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP54]], i129 0, i129 [[TMP46]]
; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP54]], i1 true, i1 [[TMP55]]
-; CHECK-NEXT: br i1 [[TMP57]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP57]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP58:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP73:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP59:%.*]] = phi i129 [ [[TMP82:%.*]], [[UDIV_BB1]] ], [ [[TMP70:%.*]], [[UDIV_DO_WHILE]] ]
@@ -236,7 +236,7 @@ define <2 x i129> @udiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP75]] = sub i129 [[TMP68]], [[TMP74]]
; CHECK-NEXT: [[TMP76]] = add i129 [[TMP63]], -1
; CHECK-NEXT: [[TMP77:%.*]] = icmp eq i129 [[TMP76]], 0
-; CHECK-NEXT: br i1 [[TMP77]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP77]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP78]] = lshr i129 [[TMP46]], [[TMP80]]
; CHECK-NEXT: [[TMP79]] = add i129 [[TMP45]], -1
@@ -246,7 +246,7 @@ define <2 x i129> @udiv129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP81:%.*]] = sub i129 128, [[TMP52]]
; CHECK-NEXT: [[TMP82]] = shl i129 [[TMP46]], [[TMP81]]
; CHECK-NEXT: [[TMP83:%.*]] = icmp eq i129 [[TMP80]], 0
-; CHECK-NEXT: br i1 [[TMP83]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP83]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP84:%.*]] = phi i129 [ [[TMP61]], [[UDIV_LOOP_EXIT]] ], [ [[TMP56]], [[UDIV_END1]] ]
; CHECK-NEXT: [[TMP85:%.*]] = insertelement <2 x i129> [[TMP42]], i129 [[TMP84]], i64 1
@@ -281,11 +281,11 @@ define <2 x i129> @srem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP18:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP13]], i1 true)
; CHECK-NEXT: [[TMP19:%.*]] = sub i129 [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = icmp ugt i129 [[TMP19]], 128
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i1 true, i1 [[TMP20]]
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP16]], i1 true, i1 [[TMP20]], !prof [[PROF1]]
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i129 [[TMP19]], 128
; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP21]], i129 0, i129 [[TMP13]]
; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP21]], i1 true, i1 [[TMP22]]
-; CHECK-NEXT: br i1 [[TMP24]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]]
+; CHECK-NEXT: br i1 [[TMP24]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit2:
; CHECK-NEXT: [[TMP25:%.*]] = phi i129 [ 0, [[UDIV_BB15]] ], [ [[TMP40:%.*]], [[UDIV_DO_WHILE3:%.*]] ]
; CHECK-NEXT: [[TMP26:%.*]] = phi i129 [ [[TMP49:%.*]], [[UDIV_BB15]] ], [ [[TMP37:%.*]], [[UDIV_DO_WHILE3]] ]
@@ -309,7 +309,7 @@ define <2 x i129> @srem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP42]] = sub i129 [[TMP35]], [[TMP41]]
; CHECK-NEXT: [[TMP43]] = add i129 [[TMP30]], -1
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i129 [[TMP43]], 0
-; CHECK-NEXT: br i1 [[TMP44]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]]
+; CHECK-NEXT: br i1 [[TMP44]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]], !prof [[PROF1]]
; CHECK: udiv-preheader4:
; CHECK-NEXT: [[TMP45]] = lshr i129 [[TMP13]], [[TMP47]]
; CHECK-NEXT: [[TMP46]] = add i129 [[TMP12]], -1
@@ -319,7 +319,7 @@ define <2 x i129> @srem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP48:%.*]] = sub i129 128, [[TMP19]]
; CHECK-NEXT: [[TMP49]] = shl i129 [[TMP13]], [[TMP48]]
; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i129 [[TMP47]], 0
-; CHECK-NEXT: br i1 [[TMP50]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]]
+; CHECK-NEXT: br i1 [[TMP50]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]], !prof [[PROF1]]
; CHECK: udiv-end1:
; CHECK-NEXT: [[TMP51:%.*]] = phi i129 [ [[TMP28]], [[UDIV_LOOP_EXIT2]] ], [ [[TMP23]], [[_UDIV_SPECIAL_CASES_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP52:%.*]] = mul i129 [[TMP11]], [[TMP51]]
@@ -348,11 +348,11 @@ define <2 x i129> @srem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP75:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP70]], i1 true)
; CHECK-NEXT: [[TMP76:%.*]] = sub i129 [[TMP74]], [[TMP75]]
; CHECK-NEXT: [[TMP77:%.*]] = icmp ugt i129 [[TMP76]], 128
-; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP73]], i1 true, i1 [[TMP77]]
+; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP73]], i1 true, i1 [[TMP77]], !prof [[PROF1]]
; CHECK-NEXT: [[TMP79:%.*]] = icmp eq i129 [[TMP76]], 128
; CHECK-NEXT: [[TMP80:%.*]] = select i1 [[TMP78]], i129 0, i129 [[TMP70]]
; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP78]], i1 true, i1 [[TMP79]]
-; CHECK-NEXT: br i1 [[TMP81]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP81]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP82:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP97:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP83:%.*]] = phi i129 [ [[TMP106:%.*]], [[UDIV_BB1]] ], [ [[TMP94:%.*]], [[UDIV_DO_WHILE]] ]
@@ -376,7 +376,7 @@ define <2 x i129> @srem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP99]] = sub i129 [[TMP92]], [[TMP98]]
; CHECK-NEXT: [[TMP100]] = add i129 [[TMP87]], -1
; CHECK-NEXT: [[TMP101:%.*]] = icmp eq i129 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[TMP101]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP101]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP102]] = lshr i129 [[TMP70]], [[TMP104]]
; CHECK-NEXT: [[TMP103]] = add i129 [[TMP69]], -1
@@ -386,7 +386,7 @@ define <2 x i129> @srem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP105:%.*]] = sub i129 128, [[TMP76]]
; CHECK-NEXT: [[TMP106]] = shl i129 [[TMP70]], [[TMP105]]
; CHECK-NEXT: [[TMP107:%.*]] = icmp eq i129 [[TMP104]], 0
-; CHECK-NEXT: br i1 [[TMP107]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP107]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP108:%.*]] = phi i129 [ [[TMP85]], [[UDIV_LOOP_EXIT]] ], [ [[TMP80]], [[UDIV_END1]] ]
; CHECK-NEXT: [[TMP109:%.*]] = mul i129 [[TMP68]], [[TMP108]]
@@ -417,11 +417,11 @@ define <2 x i129> @urem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP10:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP5]], i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = sub i129 [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i129 [[TMP11]], 128
-; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP8]], i1 true, i1 [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP8]], i1 true, i1 [[TMP12]], !prof [[PROF1]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i129 [[TMP11]], 128
; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP13]], i129 0, i129 [[TMP5]]
; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP13]], i1 true, i1 [[TMP14]]
-; CHECK-NEXT: br i1 [[TMP16]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[UDIV_END1:%.*]], label [[UDIV_BB15:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit2:
; CHECK-NEXT: [[TMP17:%.*]] = phi i129 [ 0, [[UDIV_BB15]] ], [ [[TMP32:%.*]], [[UDIV_DO_WHILE3:%.*]] ]
; CHECK-NEXT: [[TMP18:%.*]] = phi i129 [ [[TMP41:%.*]], [[UDIV_BB15]] ], [ [[TMP29:%.*]], [[UDIV_DO_WHILE3]] ]
@@ -445,7 +445,7 @@ define <2 x i129> @urem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP34]] = sub i129 [[TMP27]], [[TMP33]]
; CHECK-NEXT: [[TMP35]] = add i129 [[TMP22]], -1
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i129 [[TMP35]], 0
-; CHECK-NEXT: br i1 [[TMP36]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]]
+; CHECK-NEXT: br i1 [[TMP36]], label [[UDIV_LOOP_EXIT2:%.*]], label [[UDIV_DO_WHILE3]], !prof [[PROF1]]
; CHECK: udiv-preheader4:
; CHECK-NEXT: [[TMP37]] = lshr i129 [[TMP5]], [[TMP39]]
; CHECK-NEXT: [[TMP38]] = add i129 [[TMP4]], -1
@@ -455,7 +455,7 @@ define <2 x i129> @urem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP40:%.*]] = sub i129 128, [[TMP11]]
; CHECK-NEXT: [[TMP41]] = shl i129 [[TMP5]], [[TMP40]]
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i129 [[TMP39]], 0
-; CHECK-NEXT: br i1 [[TMP42]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]]
+; CHECK-NEXT: br i1 [[TMP42]], label [[UDIV_LOOP_EXIT2]], label [[UDIV_PREHEADER4]], !prof [[PROF1]]
; CHECK: udiv-end1:
; CHECK-NEXT: [[TMP43:%.*]] = phi i129 [ [[TMP20]], [[UDIV_LOOP_EXIT2]] ], [ [[TMP15]], [[_UDIV_SPECIAL_CASES_UDIV_SPECIAL_CASES:%.*]] ]
; CHECK-NEXT: [[TMP44:%.*]] = mul i129 [[TMP3]], [[TMP43]]
@@ -474,11 +474,11 @@ define <2 x i129> @urem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP57:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP52]], i1 true)
; CHECK-NEXT: [[TMP58:%.*]] = sub i129 [[TMP56]], [[TMP57]]
; CHECK-NEXT: [[TMP59:%.*]] = icmp ugt i129 [[TMP58]], 128
-; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP55]], i1 true, i1 [[TMP59]]
+; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP55]], i1 true, i1 [[TMP59]], !prof [[PROF1]]
; CHECK-NEXT: [[TMP61:%.*]] = icmp eq i129 [[TMP58]], 128
; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP60]], i129 0, i129 [[TMP52]]
; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP60]], i1 true, i1 [[TMP61]]
-; CHECK-NEXT: br i1 [[TMP63]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP63]], label [[UDIV_END:%.*]], label [[UDIV_BB1:%.*]], !prof [[PROF1]]
; CHECK: udiv-loop-exit:
; CHECK-NEXT: [[TMP64:%.*]] = phi i129 [ 0, [[UDIV_BB1]] ], [ [[TMP79:%.*]], [[UDIV_DO_WHILE:%.*]] ]
; CHECK-NEXT: [[TMP65:%.*]] = phi i129 [ [[TMP88:%.*]], [[UDIV_BB1]] ], [ [[TMP76:%.*]], [[UDIV_DO_WHILE]] ]
@@ -502,7 +502,7 @@ define <2 x i129> @urem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP81]] = sub i129 [[TMP74]], [[TMP80]]
; CHECK-NEXT: [[TMP82]] = add i129 [[TMP69]], -1
; CHECK-NEXT: [[TMP83:%.*]] = icmp eq i129 [[TMP82]], 0
-; CHECK-NEXT: br i1 [[TMP83]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]]
+; CHECK-NEXT: br i1 [[TMP83]], label [[UDIV_LOOP_EXIT:%.*]], label [[UDIV_DO_WHILE]], !prof [[PROF1]]
; CHECK: udiv-preheader:
; CHECK-NEXT: [[TMP84]] = lshr i129 [[TMP52]], [[TMP86]]
; CHECK-NEXT: [[TMP85]] = add i129 [[TMP51]], -1
@@ -512,7 +512,7 @@ define <2 x i129> @urem129(<2 x i129> %a, <2 x i129> %b) nounwind {
; CHECK-NEXT: [[TMP87:%.*]] = sub i129 128, [[TMP58]]
; CHECK-NEXT: [[TMP88]] = shl i129 [[TMP52]], [[TMP87]]
; CHECK-NEXT: [[TMP89:%.*]] = icmp eq i129 [[TMP86]], 0
-; CHECK-NEXT: br i1 [[TMP89]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]]
+; CHECK-NEXT: br i1 [[TMP89]], label [[UDIV_LOOP_EXIT]], label [[UDIV_PREHEADER]], !prof [[PROF1]]
; CHECK: udiv-end:
; CHECK-NEXT: [[TMP90:%.*]] = phi i129 [ [[TMP67]], [[UDIV_LOOP_EXIT]] ], [ [[TMP62]], [[UDIV_END1]] ]
; CHECK-NEXT: [[TMP91:%.*]] = mul i129 [[TMP50]], [[TMP90]]
@@ -534,3 +534,13 @@ define <vscale x 2 x i129> @sdiv129_scalable(<vscale x 2 x i129> %a, <vscale x 2
%res = sdiv <vscale x 2 x i129> %a, %b
ret <vscale x 2 x i129> %res
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"unknown", !"integer-division"}
+;.
More information about the cfe-commits
mailing list