[llvm] [CGP]: Optimize mul.overflow. (PR #148343)
Hassnaa Hamdi via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 12 00:03:44 PDT 2025
https://github.com/hassnaaHamdi created https://github.com/llvm/llvm-project/pull/148343
- Detect cases where LHS & RHS values will not cause overflow (when the Hi parts are zero).
- Detect cases where either of LHS or RHS values could not cause overflow (when one of the Hi parts is zero).
>From 208fec87b71a76162494a1daf78b54a4ac2d7a35 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Sat, 5 Jul 2025 05:57:55 +0000
Subject: [PATCH] [CGP]: Optimize mul.overflow.
- Detect cases where LHS & RHS values will not cause overflow
(when the Hi parts are zero).
- Detect cases where either of LHS or RHS values could not cause overflow
(when one of the Hi parts is zero).
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 573 +++
llvm/test/CodeGen/AArch64/i128-math.ll | 504 +-
.../CodeGen/AArch64/i128_with_overflow.ll | 198 +-
.../umulo-128-legalisation-lowering.ll | 205 +-
.../ARM/umulo-128-legalisation-lowering.ll | 579 ++-
.../ARM/umulo-64-legalisation-lowering.ll | 107 +-
.../CodeGen/LoongArch/smul-with-overflow.ll | 985 +++-
.../umulo-128-legalisation-lowering.ll | 439 +-
.../RISCV/umulo-128-legalisation-lowering.ll | 355 +-
llvm/test/CodeGen/RISCV/xaluo.ll | 2893 ++++++++++--
.../SPARC/smulo-128-legalisation-lowering.ll | 1255 ++++-
.../SPARC/umulo-128-legalisation-lowering.ll | 605 ++-
.../Thumb/umulo-128-legalisation-lowering.ll | 654 ++-
.../Thumb2/umulo-128-legalisation-lowering.ll | 294 +-
.../Thumb2/umulo-64-legalisation-lowering.ll | 51 +-
llvm/test/CodeGen/X86/muloti.ll | 177 +-
.../X86/smulo-128-legalisation-lowering.ll | 4105 +++++++++++++----
.../X86/umulo-128-legalisation-lowering.ll | 454 +-
.../X86/umulo-64-legalisation-lowering.ll | 85 +-
llvm/test/CodeGen/X86/xmulo.ll | 1625 +++++--
20 files changed, 13143 insertions(+), 3000 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 9bbb89e37865d..d9859ed246604 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -431,6 +431,8 @@ class CodeGenPrepare {
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
unsigned AddrSpace);
bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
+ bool optimizeUMulWithOverflow(Instruction *I);
+ bool optimizeSMulWithOverflow(Instruction *I);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
bool optimizeExt(Instruction *&I);
@@ -2769,6 +2771,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
return optimizeGatherScatterInst(II, II->getArgOperand(0));
case Intrinsic::masked_scatter:
return optimizeGatherScatterInst(II, II->getArgOperand(1));
+ case Intrinsic::umul_with_overflow:
+ return optimizeUMulWithOverflow(II);
+ case Intrinsic::smul_with_overflow:
+ return optimizeSMulWithOverflow(II);
}
SmallVector<Value *, 2> PtrOps;
@@ -6386,6 +6392,573 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
return true;
}
+// Rewrite the umul_with_overflow intrinsic by checking if any/both of the
+// operands' value range is within the legal type. If so, we can optimize the
+// multiplication algorithm. This code is supposed to be written during the step
+// of type legalization, but given that we need to reconstruct the IR which is
+// not doable there, we do it here.
+bool CodeGenPrepare::optimizeUMulWithOverflow(Instruction *I) {
+ if (TLI->getTypeAction(
+ I->getContext(),
+ TLI->getValueType(*DL, I->getType()->getContainedType(0))) !=
+ TargetLowering::TypeExpandInteger)
+ return false;
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ auto *Ty = LHS->getType();
+ unsigned VTBitWidth = Ty->getScalarSizeInBits();
+ unsigned VTHalfBitWidth = VTBitWidth / 2;
+ auto *LegalTy = IntegerType::getIntNTy(I->getContext(), VTHalfBitWidth);
+
+ assert(
+ (TLI->getTypeAction(I->getContext(), TLI->getValueType(*DL, LegalTy)) ==
+ TargetLowering::TypeLegal) &&
+ "Expected the type to be legal for the target lowering");
+
+ I->getParent()->setName("overflow.res");
+ auto *OverflowResBB = I->getParent();
+ auto *OverflowoEntryBB =
+ I->getParent()->splitBasicBlock(I, "overflow.entry", /*Before*/ true);
+ BasicBlock *OverflowLHSBB = BasicBlock::Create(
+ I->getContext(), "overflow.lhs", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowLHSBB = BasicBlock::Create(
+ I->getContext(), "overflow.no.lhs", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowRHSonlyBB = BasicBlock::Create(
+ I->getContext(), "overflow.no.rhs.only", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowLHSonlyBB = BasicBlock::Create(
+ I->getContext(), "overflow.no.lhs.only", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowBB = BasicBlock::Create(
+ I->getContext(), "overflow.no", I->getFunction(), OverflowResBB);
+ BasicBlock *OverflowBB = BasicBlock::Create(I->getContext(), "overflow",
+ I->getFunction(), OverflowResBB);
+ // new blocks should be:
+ // entry:
+ // lhs_lo ne lhs_hi ? overflow_yes_lhs, overflow_no_lhs
+
+ // overflow_yes_lhs:
+ // rhs_lo ne rhs_hi ? overflow : overflow_no_rhs_only
+
+ // overflow_no_lhs:
+ // rhs_lo ne rhs_hi ? overflow_no_lhs_only : overflow_no
+
+ // overflow_no_rhs_only:
+ // overflow_no_lhs_only:
+ // overflow_no:
+ // overflow:
+ // overflow.res:
+
+ IRBuilder<> BuilderEntryBB(OverflowoEntryBB->getTerminator());
+ IRBuilder<> BuilderOverflowLHSBB(OverflowLHSBB);
+ IRBuilder<> BuilderNoOverflowLHSBB(NoOverflowLHSBB);
+ IRBuilder<> BuilderNoOverflowRHSonlyBB(NoOverflowRHSonlyBB);
+ IRBuilder<> BuilderNoOverflowLHSonlyBB(NoOverflowLHSonlyBB);
+ IRBuilder<> BuilderNoOverflowBB(NoOverflowBB);
+ IRBuilder<> BuilderOverflowResBB(OverflowResBB,
+ OverflowResBB->getFirstInsertionPt());
+
+ //------------------------------------------------------------------------------
+ // BB overflow.entry:
+ // get Lo and Hi of RHS & LHS:
+
+ auto *LoRHS = BuilderEntryBB.CreateTrunc(RHS, LegalTy, "lo.rhs.trunc");
+ auto *ShrHiRHS = BuilderEntryBB.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
+ auto *HiRHS = BuilderEntryBB.CreateTrunc(ShrHiRHS, LegalTy, "hi.rhs.trunc");
+
+ auto *LoLHS = BuilderEntryBB.CreateTrunc(LHS, LegalTy, "lo.lhs.trunc");
+ auto *ShrHiLHS = BuilderEntryBB.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
+ auto *HiLHS = BuilderEntryBB.CreateTrunc(ShrHiLHS, LegalTy, "hi.lhs.trunc");
+
+ auto *Cmp = BuilderEntryBB.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
+ ConstantInt::getNullValue(LegalTy));
+ BuilderEntryBB.CreateCondBr(Cmp, OverflowLHSBB, NoOverflowLHSBB);
+ OverflowoEntryBB->getTerminator()->eraseFromParent();
+
+ //------------------------------------------------------------------------------
+ // BB overflow_yes_lhs:
+ Cmp = BuilderOverflowLHSBB.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
+ ConstantInt::getNullValue(LegalTy));
+ BuilderOverflowLHSBB.CreateCondBr(Cmp, OverflowBB, NoOverflowRHSonlyBB);
+
+ //------------------------------------------------------------------------------
+ // BB overflow_no_lhs:
+ Cmp = BuilderNoOverflowLHSBB.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
+ ConstantInt::getNullValue(LegalTy));
+ BuilderNoOverflowLHSBB.CreateCondBr(Cmp, NoOverflowLHSonlyBB, NoOverflowBB);
+
+ //------------------------------------------------------------------------------
+ // BB overflow_no_rhs_only:
+ // RHS is 64 value range, LHS is 128
+ // P0 = RHS * LoLHS
+ // P1 = RHS * HiLHS
+
+ LoLHS = BuilderNoOverflowRHSonlyBB.CreateZExt(LoLHS, Ty, "lo.lhs");
+
+ // P0 = (RHS * LoLHS)
+ auto *P0 = BuilderNoOverflowRHSonlyBB.CreateMul(RHS, LoLHS,
+ "mul.no.overflow.rhs.lolhs");
+ auto *P0Lo = BuilderNoOverflowRHSonlyBB.CreateTrunc(P0, LegalTy, "p0.lo.rhs");
+ auto *P0Hi =
+ BuilderNoOverflowRHSonlyBB.CreateLShr(P0, VTHalfBitWidth, "p0.rhs.lsr");
+ P0Hi = BuilderNoOverflowRHSonlyBB.CreateTrunc(P0Hi, LegalTy, "p0.hi.rhs");
+
+ // P1 = (RHS * HiLHS)
+ auto *P1 = BuilderNoOverflowRHSonlyBB.CreateMul(RHS, ShrHiLHS,
+ "mul.no.overflow.rhs.hilhs");
+ auto *P1Lo = BuilderNoOverflowRHSonlyBB.CreateTrunc(P1, LegalTy, "p1.lo.rhs");
+ auto *P1Hi =
+ BuilderNoOverflowRHSonlyBB.CreateLShr(P1, VTHalfBitWidth, "p1.rhs.lsr");
+ P1Hi = BuilderNoOverflowRHSonlyBB.CreateTrunc(P1Hi, LegalTy, "p1.hi.rhs");
+
+ auto *AddOverflow = BuilderNoOverflowRHSonlyBB.CreateIntrinsic(
+ Intrinsic::uadd_with_overflow, LegalTy, {P0Hi, P1Lo});
+ auto *AddOResMid = BuilderNoOverflowRHSonlyBB.CreateExtractValue(
+ AddOverflow, 0, "rhs.p0.p1.res");
+ auto *Carry = BuilderNoOverflowRHSonlyBB.CreateExtractValue(
+ AddOverflow, 1, "rhs.p0.p1.carry");
+ Carry =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(Carry, LegalTy, "rhs.carry.zext");
+ auto *ResHi =
+ BuilderNoOverflowRHSonlyBB.CreateAdd(P1Hi, Carry, "rhs.p1.carry");
+
+ auto *ResLoEx =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(P0Lo, Ty, "rhs.res_lo.zext");
+ auto *ResMid =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(AddOResMid, Ty, "rhs.res_mid.zext");
+ auto *ResMidShl = BuilderNoOverflowRHSonlyBB.CreateShl(ResMid, VTHalfBitWidth,
+ "rhs.res_mid.shl");
+ auto *FinalRes = BuilderNoOverflowRHSonlyBB.CreateOr(ResLoEx, ResMidShl,
+ "rhs.res_lo.or.mid");
+ auto *IsOverflow = BuilderNoOverflowRHSonlyBB.CreateICmp(
+ ICmpInst::ICMP_NE, ResHi, Constant::getNullValue(LegalTy),
+ "rhs.check.overflow");
+
+ StructType *STy = StructType::get(
+ I->getContext(), {Ty, IntegerType::getInt1Ty(I->getContext())});
+ Value *StructValNoOverflowRHS = PoisonValue::get(STy);
+ StructValNoOverflowRHS = BuilderNoOverflowRHSonlyBB.CreateInsertValue(
+ StructValNoOverflowRHS, FinalRes, {0});
+ StructValNoOverflowRHS = BuilderNoOverflowRHSonlyBB.CreateInsertValue(
+ StructValNoOverflowRHS, IsOverflow, {1});
+ BuilderNoOverflowRHSonlyBB.CreateBr(OverflowResBB);
+ //------------------------------------------------------------------------------
+
+ // BB overflow_no_lhs_only:
+
+ LoRHS = BuilderNoOverflowLHSonlyBB.CreateZExt(LoRHS, Ty, "lo.rhs");
+
+ // P0 = (LHS * LoRHS)
+ P0 = BuilderNoOverflowLHSonlyBB.CreateMul(LHS, LoRHS,
+ "mul.no.overflow.lhs.lorhs");
+ P0Lo = BuilderNoOverflowLHSonlyBB.CreateTrunc(P0, LegalTy, "p0.lo.lhs");
+ P0Hi =
+ BuilderNoOverflowLHSonlyBB.CreateLShr(P0, VTHalfBitWidth, "p0.lsr.lhs");
+ P0Hi = BuilderNoOverflowLHSonlyBB.CreateTrunc(P0Hi, LegalTy, "p0.hi.lhs");
+
+ // P1 = (LHS * HiRHS)
+ P1 = BuilderNoOverflowLHSonlyBB.CreateMul(LHS, ShrHiRHS,
+ "mul.no.overflow.lhs.hirhs");
+ P1Lo = BuilderNoOverflowLHSonlyBB.CreateTrunc(P1, LegalTy, "p1.lo.lhs");
+ P1Hi =
+ BuilderNoOverflowLHSonlyBB.CreateLShr(P1, VTHalfBitWidth, "p1.lhs.lsr");
+ P1Hi = BuilderNoOverflowLHSonlyBB.CreateTrunc(P1Hi, LegalTy, "p1.hi.lhs");
+
+ AddOverflow = BuilderNoOverflowLHSonlyBB.CreateIntrinsic(
+ Intrinsic::uadd_with_overflow, LegalTy, {P0Hi, P1Lo});
+ AddOResMid = BuilderNoOverflowLHSonlyBB.CreateExtractValue(AddOverflow, 0,
+ "lhs.p0.p1.res");
+ Carry = BuilderNoOverflowLHSonlyBB.CreateExtractValue(AddOverflow, 1,
+ "lhs.p0.p1.carry");
+ Carry =
+ BuilderNoOverflowLHSonlyBB.CreateZExt(Carry, LegalTy, "lhs.carry.zext");
+ ResHi = BuilderNoOverflowLHSonlyBB.CreateAdd(P1Hi, Carry, "lhs.p1.carry");
+
+ ResLoEx = BuilderNoOverflowLHSonlyBB.CreateZExt(P0Lo, Ty, "lhs.res_lo.zext");
+ ResMid =
+ BuilderNoOverflowLHSonlyBB.CreateZExt(AddOResMid, Ty, "lhs.res_mid.zext");
+ ResMidShl = BuilderNoOverflowLHSonlyBB.CreateShl(ResMid, VTHalfBitWidth,
+ "lhs.res_mid.shl");
+ FinalRes = BuilderNoOverflowLHSonlyBB.CreateOr(ResLoEx, ResMidShl,
+ "lhs.res_lo.or.mid");
+ IsOverflow = BuilderNoOverflowLHSonlyBB.CreateICmp(
+ ICmpInst::ICMP_NE, ResHi, Constant::getNullValue(LegalTy),
+ "lhs.check.overflow");
+
+ STy = StructType::get(I->getContext(),
+ {Ty, IntegerType::getInt1Ty(I->getContext())});
+ Value *StructValNoOverflowLHS = PoisonValue::get(STy);
+ StructValNoOverflowLHS = BuilderNoOverflowLHSonlyBB.CreateInsertValue(
+ StructValNoOverflowLHS, FinalRes, {0});
+ StructValNoOverflowLHS = BuilderNoOverflowLHSonlyBB.CreateInsertValue(
+ StructValNoOverflowLHS, IsOverflow, {1});
+
+ BuilderNoOverflowLHSonlyBB.CreateBr(OverflowResBB);
+ //------------------------------------------------------------------------------
+
+ // BB overflow.no:
+ auto *Mul = BuilderNoOverflowBB.CreateMul(LHS, RHS, "mul.no.overflow");
+ STy = StructType::get(I->getContext(),
+ {Ty, IntegerType::getInt1Ty(I->getContext())});
+ Value *StructValNoOverflow = PoisonValue::get(STy);
+ StructValNoOverflow =
+ BuilderNoOverflowBB.CreateInsertValue(StructValNoOverflow, Mul, {0});
+ StructValNoOverflow = BuilderNoOverflowBB.CreateInsertValue(
+ StructValNoOverflow, ConstantInt::getFalse(I->getContext()), {1});
+ BuilderNoOverflowBB.CreateBr(OverflowResBB);
+
+ // BB overflow.res:
+ auto *PHINode = BuilderOverflowResBB.CreatePHI(STy, 2);
+ PHINode->addIncoming(StructValNoOverflow, NoOverflowBB);
+ PHINode->addIncoming(StructValNoOverflowLHS, NoOverflowLHSonlyBB);
+ PHINode->addIncoming(StructValNoOverflowRHS, NoOverflowRHSonlyBB);
+
+ // Before moving the mul.overflow intrinsic to the overflowBB, replace all its
+ // uses by PHINode.
+ I->replaceAllUsesWith(PHINode);
+
+ // BB overflow:
+ PHINode->addIncoming(I, OverflowBB);
+ I->removeFromParent();
+ I->insertInto(OverflowBB, OverflowBB->end());
+ IRBuilder<>(OverflowBB, OverflowBB->end()).CreateBr(OverflowResBB);
+
+ // return false to stop reprocessing the function.
+ return false;
+}
+
+// Rewrite the smul_with_overflow intrinsic by checking if any/both of the
+// operands' value range is within the legal type. If so, we can optimize the
+// multiplication algorithm. This code is supposed to be written during the step
+// of type legalization, but given that we need to reconstruct the IR which is
+// not doable there, we do it here.
+bool CodeGenPrepare::optimizeSMulWithOverflow(Instruction *I) {
+ if (TLI->getTypeAction(
+ I->getContext(),
+ TLI->getValueType(*DL, I->getType()->getContainedType(0))) !=
+ TargetLowering::TypeExpandInteger)
+ return false;
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ auto *Ty = LHS->getType();
+ unsigned VTBitWidth = Ty->getScalarSizeInBits();
+ unsigned VTHalfBitWidth = VTBitWidth / 2;
+ auto *LegalTy = IntegerType::getIntNTy(I->getContext(), VTHalfBitWidth);
+
+ assert(
+ (TLI->getTypeAction(I->getContext(), TLI->getValueType(*DL, LegalTy)) ==
+ TargetLowering::TypeLegal) &&
+ "Expected the type to be legal for the target lowering");
+
+ I->getParent()->setName("overflow.res");
+ auto *OverflowResBB = I->getParent();
+ auto *OverflowoEntryBB =
+ I->getParent()->splitBasicBlock(I, "overflow.entry", /*Before*/ true);
+ BasicBlock *OverflowLHSBB = BasicBlock::Create(
+ I->getContext(), "overflow.lhs", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowLHSBB = BasicBlock::Create(
+ I->getContext(), "overflow.no.lhs", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowRHSonlyBB = BasicBlock::Create(
+ I->getContext(), "overflow.no.rhs.only", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowLHSonlyBB = BasicBlock::Create(
+ I->getContext(), "overflow.no.lhs.only", I->getFunction(), OverflowResBB);
+ BasicBlock *NoOverflowBB = BasicBlock::Create(
+ I->getContext(), "overflow.no", I->getFunction(), OverflowResBB);
+ BasicBlock *OverflowBB = BasicBlock::Create(I->getContext(), "overflow",
+ I->getFunction(), OverflowResBB);
+ // new blocks should be:
+ // entry:
+ // lhs_lo ne lhs_hi ? overflow_yes_lhs, overflow_no_lhs
+
+ // overflow_yes_lhs:
+ // rhs_lo ne rhs_hi ? overflow : overflow_no_rhs_only
+
+ // overflow_no_lhs:
+ // rhs_lo ne rhs_hi ? overflow_no_lhs_only : overflow_no
+
+ // overflow_no_rhs_only:
+ // overflow_no_lhs_only:
+ // overflow_no:
+ // overflow:
+ // overflow.res:
+
+ IRBuilder<> BuilderEntryBB(OverflowoEntryBB->getTerminator());
+ IRBuilder<> BuilderOverflowLHSBB(OverflowLHSBB);
+ IRBuilder<> BuilderNoOverflowLHSBB(NoOverflowLHSBB);
+ IRBuilder<> BuilderNoOverflowRHSonlyBB(NoOverflowRHSonlyBB);
+ IRBuilder<> BuilderNoOverflowLHSonlyBB(NoOverflowLHSonlyBB);
+ IRBuilder<> BuilderNoOverflowBB(NoOverflowBB);
+ IRBuilder<> BuilderOverflowResBB(OverflowResBB,
+ OverflowResBB->getFirstInsertionPt());
+
+ //------------------------------------------------------------------------------
+ // BB overflow.entry:
+ // get Lo and Hi of RHS & LHS:
+
+ auto *LoRHS = BuilderEntryBB.CreateTrunc(RHS, LegalTy, "lo.rhs");
+ auto *SignLoRHS =
+ BuilderEntryBB.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
+ auto *HiRHS = BuilderEntryBB.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
+ HiRHS = BuilderEntryBB.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
+
+ auto *LoLHS = BuilderEntryBB.CreateTrunc(LHS, LegalTy, "lo.lhs");
+ auto *SignLoLHS =
+ BuilderEntryBB.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
+ auto *HiLHS = BuilderEntryBB.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
+ HiLHS = BuilderEntryBB.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
+
+ auto *Cmp = BuilderEntryBB.CreateCmp(ICmpInst::ICMP_NE, HiLHS, SignLoLHS);
+ BuilderEntryBB.CreateCondBr(Cmp, OverflowLHSBB, NoOverflowLHSBB);
+ OverflowoEntryBB->getTerminator()->eraseFromParent();
+
+ //------------------------------------------------------------------------------
+ // BB overflow_yes_lhs:
+ Cmp = BuilderOverflowLHSBB.CreateCmp(ICmpInst::ICMP_NE, HiRHS, SignLoRHS);
+ BuilderOverflowLHSBB.CreateCondBr(Cmp, OverflowBB, NoOverflowRHSonlyBB);
+
+ //------------------------------------------------------------------------------
+ // BB overflow_no_lhs:
+ Cmp = BuilderNoOverflowLHSBB.CreateCmp(ICmpInst::ICMP_NE, HiRHS, SignLoRHS);
+ BuilderNoOverflowLHSBB.CreateCondBr(Cmp, NoOverflowLHSonlyBB, NoOverflowBB);
+
+ //------------------------------------------------------------------------------
+ // BB overflow_no_rhs_only:
+ // RHS is within 64 value range, LHS is 128
+ // P0 = RHS * LoLHS
+ // P1 = RHS * HiLHS
+
+ // check sign of RHS:
+ auto *IsNegRHS = BuilderNoOverflowRHSonlyBB.CreateIsNeg(RHS, "rhs.isneg");
+ auto *AbsRHSIntr = BuilderNoOverflowRHSonlyBB.CreateBinaryIntrinsic(
+ Intrinsic::abs, RHS, ConstantInt::getFalse(I->getContext()), {},
+ "abs.rhs");
+ auto *AbsRHS = BuilderNoOverflowRHSonlyBB.CreateSelect(
+ IsNegRHS, AbsRHSIntr, RHS, "lo.abs.rhs.select");
+
+ // check sign of LHS:
+ auto *IsNegLHS = BuilderNoOverflowRHSonlyBB.CreateIsNeg(LHS, "lhs.isneg");
+ auto *AbsLHSIntr = BuilderNoOverflowRHSonlyBB.CreateBinaryIntrinsic(
+ Intrinsic::abs, LHS, ConstantInt::getFalse(I->getContext()), {},
+ "abs.lhs");
+ auto *AbsLHS = BuilderNoOverflowRHSonlyBB.CreateSelect(IsNegLHS, AbsLHSIntr,
+ LHS, "abs.lhs.select");
+ LoLHS = BuilderNoOverflowRHSonlyBB.CreateAnd(
+ AbsLHS,
+ ConstantInt::get(Ty, APInt::getLowBitsSet(VTBitWidth, VTHalfBitWidth)),
+ "lo.abs.lhs");
+ HiLHS = BuilderNoOverflowRHSonlyBB.CreateLShr(AbsLHS, VTHalfBitWidth,
+ "hi.abs.lhs");
+
+ // P0 = (RHS * LoLHS)
+ auto *P0 = BuilderNoOverflowRHSonlyBB.CreateMul(AbsRHS, LoLHS,
+ "mul.no.overflow.rhs.lolhs");
+ auto *P0Lo = BuilderNoOverflowRHSonlyBB.CreateTrunc(P0, LegalTy, "p0.lo.rhs");
+ auto *P0Hi =
+ BuilderNoOverflowRHSonlyBB.CreateLShr(P0, VTHalfBitWidth, "p0.rhs.lsr");
+ P0Hi = BuilderNoOverflowRHSonlyBB.CreateTrunc(P0Hi, LegalTy, "p0.hi.rhs");
+
+ // P1 = (RHS * HiLHS)
+ auto *P1 = BuilderNoOverflowRHSonlyBB.CreateMul(AbsRHS, HiLHS,
+ "mul.no.overflow.rhs.hilhs");
+ auto *P1Lo = BuilderNoOverflowRHSonlyBB.CreateTrunc(P1, LegalTy, "p1.lo.rhs");
+ auto *P1Hi =
+ BuilderNoOverflowRHSonlyBB.CreateLShr(P1, VTHalfBitWidth, "p1.rhs.lsr");
+ P1Hi = BuilderNoOverflowRHSonlyBB.CreateTrunc(P1Hi, LegalTy, "p1.hi.rhs");
+
+ auto *AddOverflow = BuilderNoOverflowRHSonlyBB.CreateIntrinsic(
+ Intrinsic::uadd_with_overflow, LegalTy, {P0Hi, P1Lo});
+ auto *AddOResMid = BuilderNoOverflowRHSonlyBB.CreateExtractValue(
+ AddOverflow, 0, "rhs.p0.p1.res");
+ auto *Carry = BuilderNoOverflowRHSonlyBB.CreateExtractValue(
+ AddOverflow, 1, "rhs.p0.p1.carry");
+ Carry =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(Carry, LegalTy, "rhs.carry.zext");
+ auto *ResHi =
+ BuilderNoOverflowRHSonlyBB.CreateAdd(P1Hi, Carry, "rhs.p1.carry");
+
+ // sign handling:
+ auto *IsNeg = BuilderNoOverflowRHSonlyBB.CreateXor(IsNegRHS, IsNegLHS); // i1
+ auto *Mask =
+ BuilderNoOverflowRHSonlyBB.CreateSExt(IsNeg, LegalTy, "rhs.sign.mask");
+ auto *Add_1 =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(IsNeg, LegalTy, "rhs.add.1");
+ auto *ResLo =
+ BuilderNoOverflowRHSonlyBB.CreateXor(P0Lo, Mask, "rhs.res_lo.xor.mask");
+ ResLo =
+ BuilderNoOverflowRHSonlyBB.CreateAdd(ResLo, Add_1, "rhs.res_lo.add.1");
+
+ Carry = BuilderNoOverflowRHSonlyBB.CreateCmp(ICmpInst::ICMP_ULT, ResLo, Add_1,
+ "rhs.check.res_lo.carry");
+ Carry =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(Carry, LegalTy, "rhs.carry.zext");
+ auto *ResMid = BuilderNoOverflowRHSonlyBB.CreateXor(AddOResMid, Mask,
+ "rhs.res_mid.xor.mask");
+ ResMid =
+ BuilderNoOverflowRHSonlyBB.CreateAdd(ResMid, Carry, "rhs.res_mid.carry");
+
+ Carry = BuilderNoOverflowRHSonlyBB.CreateCmp(ICmpInst::ICMP_ULT, ResMid,
+ Carry, "rhs.check.reslo.carry");
+ Carry =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(Carry, LegalTy, "rhs.carry.zext");
+ ResHi =
+ BuilderNoOverflowRHSonlyBB.CreateXor(ResHi, Mask, "rhs.res_hi.xor.mask");
+ ResHi =
+ BuilderNoOverflowRHSonlyBB.CreateAdd(ResHi, Carry, "rhs.res_hi.carry");
+ // set the final result:
+ auto *ResLoEx =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(ResLo, Ty, "rhs.res_lo.zext");
+ ResMid =
+ BuilderNoOverflowRHSonlyBB.CreateZExt(ResMid, Ty, "rhs.res_mid.zext");
+ auto *ResMidShl = BuilderNoOverflowRHSonlyBB.CreateShl(ResMid, VTHalfBitWidth,
+ "rhs.res_mid.shl");
+ auto *FinalRes = BuilderNoOverflowRHSonlyBB.CreateOr(ResLoEx, ResMidShl,
+ "rhs.res_lo.or.mid");
+ auto *IsOverflow = BuilderNoOverflowRHSonlyBB.CreateICmp(
+ ICmpInst::ICMP_NE, ResHi, Constant::getNullValue(LegalTy),
+ "rhs.check.overflow");
+
+ StructType *STy = StructType::get(
+ I->getContext(), {Ty, IntegerType::getInt1Ty(I->getContext())});
+ Value *StructValNoOverflowRHS = PoisonValue::get(STy);
+ StructValNoOverflowRHS = BuilderNoOverflowRHSonlyBB.CreateInsertValue(
+ StructValNoOverflowRHS, FinalRes, {0});
+ StructValNoOverflowRHS = BuilderNoOverflowRHSonlyBB.CreateInsertValue(
+ StructValNoOverflowRHS, IsOverflow, {1});
+ BuilderNoOverflowRHSonlyBB.CreateBr(OverflowResBB);
+ //------------------------------------------------------------------------------
+
+ // BB overflow_no_lhs_only:
+ // LHS (64), RHS is 128
+ // P0 = LHS * LoRHS
+ // P1 = LHS * HiRHS
+
+ // check sign of LHS:
+ IsNegLHS = BuilderNoOverflowLHSonlyBB.CreateIsNeg(LHS, "lhs.isneg");
+ AbsLHSIntr = BuilderNoOverflowLHSonlyBB.CreateBinaryIntrinsic(
+ Intrinsic::abs, LHS, ConstantInt::getFalse(I->getContext()), {},
+ "abs.lhs");
+ AbsLHS = BuilderNoOverflowLHSonlyBB.CreateSelect(IsNegLHS, AbsLHSIntr, LHS,
+ "abs.lhs.select");
+
+ // check sign of RHS:
+ IsNegRHS = BuilderNoOverflowLHSonlyBB.CreateIsNeg(RHS, "rhs.isneg");
+ AbsRHSIntr = BuilderNoOverflowLHSonlyBB.CreateBinaryIntrinsic(
+ Intrinsic::abs, RHS, ConstantInt::getFalse(I->getContext()), {},
+ "abs.rhs");
+ AbsRHS = BuilderNoOverflowLHSonlyBB.CreateSelect(IsNegRHS, AbsRHSIntr, RHS,
+ "abs.rhs.select");
+
+ LoRHS = BuilderNoOverflowLHSonlyBB.CreateAnd(
+ AbsRHS,
+ ConstantInt::get(Ty, APInt::getLowBitsSet(VTBitWidth, VTHalfBitWidth)),
+ "lo.abs.rhs");
+ HiRHS = BuilderNoOverflowLHSonlyBB.CreateLShr(AbsRHS, VTHalfBitWidth,
+ "hi.abs.rhs");
+
+ // P0 = (LHS * LoRHS)
+ P0 = BuilderNoOverflowLHSonlyBB.CreateMul(AbsLHS, LoRHS,
+ "mul.no.overflow.lhs.lorhs");
+ P0Lo = BuilderNoOverflowLHSonlyBB.CreateTrunc(P0, LegalTy, "p0.lo.lhs");
+ P0Hi =
+ BuilderNoOverflowLHSonlyBB.CreateLShr(P0, VTHalfBitWidth, "p0.lsr.lhs");
+ P0Hi = BuilderNoOverflowLHSonlyBB.CreateTrunc(P0Hi, LegalTy, "p0.hi.lhs");
+
+ // P1 = (LHS * HiRHS)
+ P1 = BuilderNoOverflowLHSonlyBB.CreateMul(AbsLHS, HiRHS,
+ "mul.no.overflow.lhs.hirhs");
+ P1Lo = BuilderNoOverflowLHSonlyBB.CreateTrunc(P1, LegalTy, "p1.lo.lhs");
+ P1Hi =
+ BuilderNoOverflowLHSonlyBB.CreateLShr(P1, VTHalfBitWidth, "p1.lhs.lsr");
+ P1Hi = BuilderNoOverflowLHSonlyBB.CreateTrunc(P1Hi, LegalTy, "p1.hi.lhs");
+
+ AddOverflow = BuilderNoOverflowLHSonlyBB.CreateIntrinsic(
+ Intrinsic::uadd_with_overflow, LegalTy, {P0Hi, P1Lo});
+ AddOResMid = BuilderNoOverflowLHSonlyBB.CreateExtractValue(AddOverflow, 0,
+ "lhs.p0.p1.res");
+ Carry = BuilderNoOverflowLHSonlyBB.CreateExtractValue(AddOverflow, 1,
+ "lhs.p0.p1.carry");
+ Carry =
+ BuilderNoOverflowLHSonlyBB.CreateZExt(Carry, LegalTy, "lhs.carry.zext");
+ ResHi = BuilderNoOverflowLHSonlyBB.CreateAdd(P1Hi, Carry, "lhs.p1.carry");
+
+ // sign handling:
+ IsNeg = BuilderNoOverflowLHSonlyBB.CreateXor(IsNegRHS, IsNegLHS); // i1
+ Mask = BuilderNoOverflowLHSonlyBB.CreateSExt(IsNeg, LegalTy, "lhs.sign.mask");
+ Add_1 = BuilderNoOverflowLHSonlyBB.CreateZExt(IsNeg, LegalTy, "lhs.add.1");
+ ResLo =
+ BuilderNoOverflowLHSonlyBB.CreateXor(P0Lo, Mask, "lhs.res_lo.xor.mask");
+ ResLo =
+ BuilderNoOverflowLHSonlyBB.CreateAdd(ResLo, Add_1, "lhs.res_lo.add.1");
+
+ Carry = BuilderNoOverflowLHSonlyBB.CreateCmp(ICmpInst::ICMP_ULT, ResLo, Add_1,
+ "lhs.check.res_lo.carry");
+ Carry =
+ BuilderNoOverflowLHSonlyBB.CreateZExt(Carry, LegalTy, "lhs.carry.zext");
+ ResMid = BuilderNoOverflowLHSonlyBB.CreateXor(AddOResMid, Mask,
+ "lhs.res_mid.xor.mask");
+ ResMid =
+ BuilderNoOverflowLHSonlyBB.CreateAdd(ResMid, Carry, "lhs.res_mid.carry");
+
+ Carry = BuilderNoOverflowLHSonlyBB.CreateCmp(ICmpInst::ICMP_ULT, ResMid,
+ Carry, "lhs.check.reslo.carry");
+ Carry =
+ BuilderNoOverflowLHSonlyBB.CreateZExt(Carry, LegalTy, "lhs.carry.zext");
+ ResHi =
+ BuilderNoOverflowLHSonlyBB.CreateXor(ResHi, Mask, "lhs.res_hi.xor.mask");
+ ResHi =
+ BuilderNoOverflowLHSonlyBB.CreateAdd(ResHi, Carry, "lhs.res_hi.carry");
+ // Set the final result:
+ ResLoEx = BuilderNoOverflowLHSonlyBB.CreateZExt(ResLo, Ty, "lhs.res_lo.zext");
+ ResMid =
+ BuilderNoOverflowLHSonlyBB.CreateZExt(ResMid, Ty, "lhs.res_mid.zext");
+ ResMidShl = BuilderNoOverflowLHSonlyBB.CreateShl(ResMid, VTHalfBitWidth,
+ "lhs.res_mid.shl");
+ FinalRes = BuilderNoOverflowLHSonlyBB.CreateOr(ResLoEx, ResMidShl,
+ "lhs.res_lo.or.mid");
+ IsOverflow = BuilderNoOverflowLHSonlyBB.CreateICmp(
+ ICmpInst::ICMP_NE, ResHi, Constant::getNullValue(LegalTy),
+ "lhs.check.overflow");
+
+ STy = StructType::get(I->getContext(),
+ {Ty, IntegerType::getInt1Ty(I->getContext())});
+ Value *StructValNoOverflowLHS = PoisonValue::get(STy);
+ StructValNoOverflowLHS = BuilderNoOverflowLHSonlyBB.CreateInsertValue(
+ StructValNoOverflowLHS, FinalRes, {0});
+ StructValNoOverflowLHS = BuilderNoOverflowLHSonlyBB.CreateInsertValue(
+ StructValNoOverflowLHS, IsOverflow, {1});
+
+ BuilderNoOverflowLHSonlyBB.CreateBr(OverflowResBB);
+ //------------------------------------------------------------------------------
+
+ // BB overflow.no:
+ auto *Mul = BuilderNoOverflowBB.CreateMul(LHS, RHS, "mul.no.overflow");
+ STy = StructType::get(I->getContext(),
+ {Ty, IntegerType::getInt1Ty(I->getContext())});
+ Value *StructValNoOverflow = PoisonValue::get(STy);
+ StructValNoOverflow =
+ BuilderNoOverflowBB.CreateInsertValue(StructValNoOverflow, Mul, {0});
+ StructValNoOverflow = BuilderNoOverflowBB.CreateInsertValue(
+ StructValNoOverflow, ConstantInt::getFalse(I->getContext()), {1});
+ BuilderNoOverflowBB.CreateBr(OverflowResBB);
+
+ // BB overflow.res:
+ auto *PHINode = BuilderOverflowResBB.CreatePHI(STy, 2);
+ PHINode->addIncoming(StructValNoOverflow, NoOverflowBB);
+ PHINode->addIncoming(StructValNoOverflowLHS, NoOverflowLHSonlyBB);
+ PHINode->addIncoming(StructValNoOverflowRHS, NoOverflowRHSonlyBB);
+
+ // Before moving the mul.overflow intrinsic to the overflowBB, replace all its
+ // uses by PHINode.
+ I->replaceAllUsesWith(PHINode);
+
+ // BB overflow:
+ PHINode->addIncoming(I, OverflowBB);
+ I->removeFromParent();
+ I->insertInto(OverflowBB, OverflowBB->end());
+ IRBuilder<>(OverflowBB, OverflowBB->end()).CreateBr(OverflowResBB);
+
+ // return false to stop reprocessing the function.
+ return false;
+}
+
/// If there are any memory operands, use OptimizeMemoryInst to sink their
/// address computing into the block when possible / profitable.
bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll
index 9e1c0c1b115ab..e2791f44d0a08 100644
--- a/llvm/test/CodeGen/AArch64/i128-math.ll
+++ b/llvm/test/CodeGen/AArch64/i128-math.ll
@@ -261,21 +261,55 @@ define i128 @u128_mul(i128 %x, i128 %y) {
define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
; CHECK-LABEL: u128_checked_mul:
-; CHECK: // %bb.0:
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: cbz x1, .LBB17_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cbz x3, .LBB17_5
+; CHECK-NEXT: // %bb.2: // %overflow
; CHECK-NEXT: mul x9, x3, x0
; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: ccmp x3, #0, #4, ne
-; CHECK-NEXT: umulh x8, x1, x2
-; CHECK-NEXT: umulh x10, x3, x0
+; CHECK-NEXT: umulh x10, x1, x2
+; CHECK-NEXT: umulh x8, x3, x0
; CHECK-NEXT: madd x9, x1, x2, x9
-; CHECK-NEXT: ccmp xzr, x8, #0, eq
-; CHECK-NEXT: umulh x11, x0, x2
; CHECK-NEXT: ccmp xzr, x10, #0, eq
+; CHECK-NEXT: umulh x11, x0, x2
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
; CHECK-NEXT: mul x0, x0, x2
; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: adds x1, x11, x9
; CHECK-NEXT: csinc w8, w8, wzr, lo
-; CHECK-NEXT: eor w2, w8, #0x1
+; CHECK-NEXT: b .LBB17_8
+; CHECK-NEXT: .LBB17_3: // %overflow.no.lhs
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: cbz x3, .LBB17_7
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: madd x8, x1, x2, x8
+; CHECK-NEXT: umulh x9, x0, x3
+; CHECK-NEXT: mul x10, x0, x3
+; CHECK-NEXT: mul x11, x1, x3
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: b .LBB17_6
+; CHECK-NEXT: .LBB17_5: // %overflow.no.rhs.only
+; CHECK-NEXT: umulh x8, x2, x0
+; CHECK-NEXT: umulh x9, x2, x1
+; CHECK-NEXT: madd x8, x3, x0, x8
+; CHECK-NEXT: mul x10, x2, x1
+; CHECK-NEXT: mul x11, x3, x1
+; CHECK-NEXT: mul x0, x2, x0
+; CHECK-NEXT: .LBB17_6: // %overflow.res
+; CHECK-NEXT: adds x1, x8, x10
+; CHECK-NEXT: adcs xzr, x9, x11
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: b .LBB17_8
+; CHECK-NEXT: .LBB17_7: // %overflow.no
+; CHECK-NEXT: madd x8, x0, x3, x8
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: madd x1, x1, x2, x8
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: .LBB17_8: // %overflow.res
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: bic w2, w9, w8
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -289,20 +323,54 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
; CHECK-LABEL: u128_overflowing_mul:
-; CHECK: // %bb.0:
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: cbz x1, .LBB18_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cbz x3, .LBB18_5
+; CHECK-NEXT: // %bb.2: // %overflow
; CHECK-NEXT: mul x9, x3, x0
; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: ccmp x3, #0, #4, ne
-; CHECK-NEXT: umulh x8, x1, x2
-; CHECK-NEXT: umulh x10, x3, x0
+; CHECK-NEXT: umulh x10, x1, x2
+; CHECK-NEXT: umulh x8, x3, x0
; CHECK-NEXT: madd x9, x1, x2, x9
-; CHECK-NEXT: ccmp xzr, x8, #0, eq
-; CHECK-NEXT: umulh x11, x0, x2
; CHECK-NEXT: ccmp xzr, x10, #0, eq
+; CHECK-NEXT: umulh x11, x0, x2
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
; CHECK-NEXT: mul x0, x0, x2
; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: adds x1, x11, x9
-; CHECK-NEXT: csinc w2, w8, wzr, lo
+; CHECK-NEXT: csinc w8, w8, wzr, lo
+; CHECK-NEXT: and w2, w8, #0x1
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB18_3: // %overflow.no.lhs
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: cbz x3, .LBB18_7
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: madd x8, x1, x2, x8
+; CHECK-NEXT: umulh x9, x0, x3
+; CHECK-NEXT: mul x10, x0, x3
+; CHECK-NEXT: mul x11, x1, x3
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: b .LBB18_6
+; CHECK-NEXT: .LBB18_5: // %overflow.no.rhs.only
+; CHECK-NEXT: umulh x8, x2, x0
+; CHECK-NEXT: umulh x9, x2, x1
+; CHECK-NEXT: madd x8, x3, x0, x8
+; CHECK-NEXT: mul x10, x2, x1
+; CHECK-NEXT: mul x11, x3, x1
+; CHECK-NEXT: mul x0, x2, x0
+; CHECK-NEXT: .LBB18_6: // %overflow.res
+; CHECK-NEXT: adds x1, x8, x10
+; CHECK-NEXT: adcs xzr, x9, x11
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: and w2, w8, #0x1
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB18_7: // %overflow.no
+; CHECK-NEXT: madd x8, x0, x3, x8
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: madd x1, x1, x2, x8
+; CHECK-NEXT: and w2, wzr, #0x1
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -315,21 +383,54 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
define i128 @u128_saturating_mul(i128 %x, i128 %y) {
; CHECK-LABEL: u128_saturating_mul:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mul x9, x3, x0
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: cbz x1, .LBB19_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cbz x3, .LBB19_5
+; CHECK-NEXT: // %bb.2: // %overflow
+; CHECK-NEXT: mul x8, x3, x0
; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: ccmp x3, #0, #4, ne
-; CHECK-NEXT: umulh x8, x1, x2
-; CHECK-NEXT: umulh x10, x3, x0
-; CHECK-NEXT: madd x9, x1, x2, x9
-; CHECK-NEXT: ccmp xzr, x8, #0, eq
-; CHECK-NEXT: umulh x11, x0, x2
+; CHECK-NEXT: umulh x10, x1, x2
+; CHECK-NEXT: umulh x9, x3, x0
+; CHECK-NEXT: madd x11, x1, x2, x8
; CHECK-NEXT: ccmp xzr, x10, #0, eq
+; CHECK-NEXT: umulh x12, x0, x2
+; CHECK-NEXT: ccmp xzr, x9, #0, eq
; CHECK-NEXT: mul x8, x0, x2
; CHECK-NEXT: cset w10, ne
-; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: adds x9, x12, x11
; CHECK-NEXT: csinc w10, w10, wzr, lo
-; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: b .LBB19_8
+; CHECK-NEXT: .LBB19_3: // %overflow.no.lhs
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: cbz x3, .LBB19_7
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: madd x9, x1, x2, x8
+; CHECK-NEXT: umulh x10, x0, x3
+; CHECK-NEXT: mul x11, x0, x3
+; CHECK-NEXT: mul x12, x1, x3
+; CHECK-NEXT: mul x8, x0, x2
+; CHECK-NEXT: b .LBB19_6
+; CHECK-NEXT: .LBB19_5: // %overflow.no.rhs.only
+; CHECK-NEXT: umulh x8, x2, x0
+; CHECK-NEXT: umulh x10, x2, x1
+; CHECK-NEXT: madd x9, x3, x0, x8
+; CHECK-NEXT: mul x11, x2, x1
+; CHECK-NEXT: mul x12, x3, x1
+; CHECK-NEXT: mul x8, x2, x0
+; CHECK-NEXT: .LBB19_6: // %overflow.res
+; CHECK-NEXT: adds x9, x9, x11
+; CHECK-NEXT: adcs xzr, x10, x12
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: b .LBB19_8
+; CHECK-NEXT: .LBB19_7: // %overflow.no
+; CHECK-NEXT: madd x8, x0, x3, x8
+; CHECK-NEXT: mov w10, wzr
+; CHECK-NEXT: madd x9, x1, x2, x8
+; CHECK-NEXT: mul x8, x0, x2
+; CHECK-NEXT: .LBB19_8: // %overflow.res
+; CHECK-NEXT: tst w10, #0x1
; CHECK-NEXT: csinv x0, x8, xzr, eq
; CHECK-NEXT: csinv x1, x9, xzr, eq
; CHECK-NEXT: ret
@@ -354,7 +455,14 @@ define i128 @i128_mul(i128 %x, i128 %y) {
define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_checked_mul:
-; CHECK: // %bb.0:
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: asr x8, x2, #63
+; CHECK-NEXT: cmp x1, x0, asr #63
+; CHECK-NEXT: b.eq .LBB21_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB21_5
+; CHECK-NEXT: // %bb.2: // %overflow
; CHECK-NEXT: asr x9, x1, #63
; CHECK-NEXT: umulh x10, x0, x2
; CHECK-NEXT: asr x13, x3, #63
@@ -364,24 +472,106 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
; CHECK-NEXT: adds x10, x11, x10
; CHECK-NEXT: mul x14, x0, x3
; CHECK-NEXT: umulh x12, x0, x3
-; CHECK-NEXT: adc x9, x8, x9
+; CHECK-NEXT: adc x8, x8, x9
; CHECK-NEXT: mul x13, x0, x13
-; CHECK-NEXT: adds x8, x14, x10
+; CHECK-NEXT: asr x11, x8, #63
+; CHECK-NEXT: adds x9, x14, x10
; CHECK-NEXT: mul x15, x1, x3
; CHECK-NEXT: smulh x10, x1, x3
-; CHECK-NEXT: mov x1, x8
-; CHECK-NEXT: adc x11, x12, x13
+; CHECK-NEXT: mov x1, x9
+; CHECK-NEXT: adc x9, x12, x13
; CHECK-NEXT: asr x12, x9, #63
-; CHECK-NEXT: asr x13, x11, #63
-; CHECK-NEXT: adds x9, x9, x11
-; CHECK-NEXT: asr x11, x8, #63
; CHECK-NEXT: mul x0, x0, x2
-; CHECK-NEXT: adc x12, x12, x13
-; CHECK-NEXT: adds x9, x15, x9
-; CHECK-NEXT: adc x10, x10, x12
-; CHECK-NEXT: cmp x9, x11
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w2, eq
+; CHECK-NEXT: adds x8, x8, x9
+; CHECK-NEXT: asr x9, x1, #63
+; CHECK-NEXT: adc x11, x11, x12
+; CHECK-NEXT: adds x8, x15, x8
+; CHECK-NEXT: adc x10, x10, x11
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: ccmp x10, x9, #0, eq
+; CHECK-NEXT: b .LBB21_7
+; CHECK-NEXT: .LBB21_3: // %overflow.no.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB21_8
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x10, x3, #63
+; CHECK-NEXT: eor x9, x0, x8
+; CHECK-NEXT: eor x11, x1, x8
+; CHECK-NEXT: eor x12, x2, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: eor x11, x3, x10
+; CHECK-NEXT: csel x8, x8, x1, lt
+; CHECK-NEXT: csel x9, x9, x0, lt
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: csel x11, x12, x2, lt
+; CHECK-NEXT: csel x10, x10, x3, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w12, w13
+; CHECK-NEXT: b .LBB21_6
+; CHECK-NEXT: .LBB21_5: // %overflow.no.rhs.only
+; CHECK-NEXT: asr x8, x3, #63
+; CHECK-NEXT: asr x10, x1, #63
+; CHECK-NEXT: eor x9, x2, x8
+; CHECK-NEXT: eor x11, x3, x8
+; CHECK-NEXT: eor x12, x0, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: eor x11, x1, x10
+; CHECK-NEXT: csel x8, x8, x3, lt
+; CHECK-NEXT: csel x9, x9, x2, lt
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x11, x12, x0, lt
+; CHECK-NEXT: csel x10, x10, x1, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w13, w12
+; CHECK-NEXT: .LBB21_6: // %overflow.res
+; CHECK-NEXT: sbfx x12, x10, #0, #1
+; CHECK-NEXT: adds x8, x8, x11
+; CHECK-NEXT: adc x9, x9, x15
+; CHECK-NEXT: eor x13, x14, x12
+; CHECK-NEXT: eor x8, x8, x12
+; CHECK-NEXT: add x0, x13, x10
+; CHECK-NEXT: cmp x0, x10
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: cinc x1, x8, lo
+; CHECK-NEXT: eor x8, x9, x12
+; CHECK-NEXT: cmp x1, x10
+; CHECK-NEXT: cinc x8, x8, lo
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: .LBB21_7: // %overflow.res
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: b .LBB21_9
+; CHECK-NEXT: .LBB21_8: // %overflow.no
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: madd x8, x0, x3, x8
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: madd x1, x1, x2, x8
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: .LBB21_9: // %overflow.res
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: bic w2, w9, w8
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -395,7 +585,14 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_overflowing_mul:
-; CHECK: // %bb.0:
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: asr x8, x2, #63
+; CHECK-NEXT: cmp x1, x0, asr #63
+; CHECK-NEXT: b.eq .LBB22_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB22_5
+; CHECK-NEXT: // %bb.2: // %overflow
; CHECK-NEXT: asr x9, x1, #63
; CHECK-NEXT: umulh x10, x0, x2
; CHECK-NEXT: asr x13, x3, #63
@@ -405,24 +602,104 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
; CHECK-NEXT: adds x10, x11, x10
; CHECK-NEXT: mul x14, x0, x3
; CHECK-NEXT: umulh x12, x0, x3
-; CHECK-NEXT: adc x9, x8, x9
+; CHECK-NEXT: adc x8, x8, x9
; CHECK-NEXT: mul x13, x0, x13
-; CHECK-NEXT: adds x8, x14, x10
+; CHECK-NEXT: asr x11, x8, #63
+; CHECK-NEXT: adds x9, x14, x10
; CHECK-NEXT: mul x15, x1, x3
; CHECK-NEXT: smulh x10, x1, x3
-; CHECK-NEXT: mov x1, x8
-; CHECK-NEXT: adc x11, x12, x13
+; CHECK-NEXT: mov x1, x9
+; CHECK-NEXT: adc x9, x12, x13
; CHECK-NEXT: asr x12, x9, #63
-; CHECK-NEXT: asr x13, x11, #63
-; CHECK-NEXT: adds x9, x9, x11
-; CHECK-NEXT: asr x11, x8, #63
; CHECK-NEXT: mul x0, x0, x2
-; CHECK-NEXT: adc x12, x12, x13
-; CHECK-NEXT: adds x9, x15, x9
-; CHECK-NEXT: adc x10, x10, x12
-; CHECK-NEXT: cmp x9, x11
-; CHECK-NEXT: ccmp x10, x11, #0, eq
-; CHECK-NEXT: cset w2, ne
+; CHECK-NEXT: adds x8, x8, x9
+; CHECK-NEXT: asr x9, x1, #63
+; CHECK-NEXT: adc x11, x11, x12
+; CHECK-NEXT: adds x8, x15, x8
+; CHECK-NEXT: adc x10, x10, x11
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: ccmp x10, x9, #0, eq
+; CHECK-NEXT: b .LBB22_7
+; CHECK-NEXT: .LBB22_3: // %overflow.no.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB22_8
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x10, x3, #63
+; CHECK-NEXT: eor x9, x0, x8
+; CHECK-NEXT: eor x11, x1, x8
+; CHECK-NEXT: eor x12, x2, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: eor x11, x3, x10
+; CHECK-NEXT: csel x8, x8, x1, lt
+; CHECK-NEXT: csel x9, x9, x0, lt
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: csel x11, x12, x2, lt
+; CHECK-NEXT: csel x10, x10, x3, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w12, w13
+; CHECK-NEXT: b .LBB22_6
+; CHECK-NEXT: .LBB22_5: // %overflow.no.rhs.only
+; CHECK-NEXT: asr x8, x3, #63
+; CHECK-NEXT: asr x10, x1, #63
+; CHECK-NEXT: eor x9, x2, x8
+; CHECK-NEXT: eor x11, x3, x8
+; CHECK-NEXT: eor x12, x0, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: eor x11, x1, x10
+; CHECK-NEXT: csel x8, x8, x3, lt
+; CHECK-NEXT: csel x9, x9, x2, lt
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x11, x12, x0, lt
+; CHECK-NEXT: csel x10, x10, x1, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w13, w12
+; CHECK-NEXT: .LBB22_6: // %overflow.res
+; CHECK-NEXT: sbfx x12, x10, #0, #1
+; CHECK-NEXT: adds x8, x8, x11
+; CHECK-NEXT: adc x9, x9, x15
+; CHECK-NEXT: eor x13, x14, x12
+; CHECK-NEXT: eor x8, x8, x12
+; CHECK-NEXT: add x0, x13, x10
+; CHECK-NEXT: cmp x0, x10
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: cinc x1, x8, lo
+; CHECK-NEXT: eor x8, x9, x12
+; CHECK-NEXT: cmp x1, x10
+; CHECK-NEXT: cinc x8, x8, lo
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: .LBB22_7: // %overflow.res
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: and w2, w8, #0x1
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB22_8: // %overflow.no
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: madd x8, x0, x3, x8
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: madd x1, x1, x2, x8
+; CHECK-NEXT: and w2, wzr, #0x1
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -435,7 +712,14 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
define i128 @i128_saturating_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_saturating_mul:
-; CHECK: // %bb.0:
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: asr x8, x2, #63
+; CHECK-NEXT: cmp x1, x0, asr #63
+; CHECK-NEXT: b.eq .LBB23_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB23_5
+; CHECK-NEXT: // %bb.2: // %overflow
; CHECK-NEXT: asr x9, x1, #63
; CHECK-NEXT: umulh x10, x0, x2
; CHECK-NEXT: asr x13, x3, #63
@@ -448,26 +732,106 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) {
; CHECK-NEXT: adc x8, x8, x9
; CHECK-NEXT: mul x13, x0, x13
; CHECK-NEXT: adds x9, x14, x10
-; CHECK-NEXT: mul x11, x1, x3
-; CHECK-NEXT: adc x10, x12, x13
-; CHECK-NEXT: smulh x12, x1, x3
-; CHECK-NEXT: asr x13, x8, #63
-; CHECK-NEXT: asr x14, x10, #63
-; CHECK-NEXT: adds x8, x8, x10
-; CHECK-NEXT: adc x10, x13, x14
-; CHECK-NEXT: adds x8, x11, x8
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: mul x13, x0, x2
-; CHECK-NEXT: adc x10, x12, x10
-; CHECK-NEXT: eor x12, x3, x1
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: eor x10, x10, x11
-; CHECK-NEXT: asr x11, x12, #63
-; CHECK-NEXT: orr x8, x8, x10
-; CHECK-NEXT: eor x10, x11, #0x7fffffffffffffff
-; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: csinv x0, x13, x11, eq
-; CHECK-NEXT: csel x1, x10, x9, ne
+; CHECK-NEXT: mul x15, x1, x3
+; CHECK-NEXT: asr x14, x9, #63
+; CHECK-NEXT: smulh x10, x1, x3
+; CHECK-NEXT: adc x11, x12, x13
+; CHECK-NEXT: asr x12, x8, #63
+; CHECK-NEXT: asr x13, x11, #63
+; CHECK-NEXT: adds x11, x8, x11
+; CHECK-NEXT: mul x8, x0, x2
+; CHECK-NEXT: adc x12, x12, x13
+; CHECK-NEXT: adds x11, x15, x11
+; CHECK-NEXT: adc x10, x10, x12
+; CHECK-NEXT: cmp x11, x14
+; CHECK-NEXT: ccmp x10, x14, #0, eq
+; CHECK-NEXT: b .LBB23_7
+; CHECK-NEXT: .LBB23_3: // %overflow.no.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB23_8
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x10, x3, #63
+; CHECK-NEXT: eor x9, x0, x8
+; CHECK-NEXT: eor x11, x1, x8
+; CHECK-NEXT: eor x12, x2, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: eor x11, x3, x10
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: csel x8, x8, x1, lt
+; CHECK-NEXT: csel x9, x9, x0, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: csel x11, x12, x2, lt
+; CHECK-NEXT: csel x10, x10, x3, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w12, w13
+; CHECK-NEXT: b .LBB23_6
+; CHECK-NEXT: .LBB23_5: // %overflow.no.rhs.only
+; CHECK-NEXT: asr x8, x3, #63
+; CHECK-NEXT: asr x10, x1, #63
+; CHECK-NEXT: eor x9, x2, x8
+; CHECK-NEXT: eor x11, x3, x8
+; CHECK-NEXT: eor x12, x0, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: eor x11, x1, x10
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: csel x8, x8, x3, lt
+; CHECK-NEXT: csel x9, x9, x2, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x11, x12, x0, lt
+; CHECK-NEXT: csel x10, x10, x1, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w13, w12
+; CHECK-NEXT: .LBB23_6: // %overflow.res
+; CHECK-NEXT: sbfx x12, x10, #0, #1
+; CHECK-NEXT: adds x11, x8, x11
+; CHECK-NEXT: eor x13, x14, x12
+; CHECK-NEXT: add x8, x13, x10
+; CHECK-NEXT: adc x13, x9, x15
+; CHECK-NEXT: eor x9, x11, x12
+; CHECK-NEXT: cmp x8, x10
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: cinc x9, x9, lo
+; CHECK-NEXT: cmp x9, x10
+; CHECK-NEXT: eor x10, x13, x12
+; CHECK-NEXT: cinc x10, x10, lo
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: .LBB23_7: // %overflow.res
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: b .LBB23_9
+; CHECK-NEXT: .LBB23_8: // %overflow.no
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: mov w10, wzr
+; CHECK-NEXT: madd x8, x0, x3, x8
+; CHECK-NEXT: madd x9, x1, x2, x8
+; CHECK-NEXT: mul x8, x0, x2
+; CHECK-NEXT: .LBB23_9: // %overflow.res
+; CHECK-NEXT: eor x11, x3, x1
+; CHECK-NEXT: tst w10, #0x1
+; CHECK-NEXT: asr x11, x11, #63
+; CHECK-NEXT: eor x12, x11, #0x7fffffffffffffff
+; CHECK-NEXT: csinv x0, x8, x11, eq
+; CHECK-NEXT: csel x1, x12, x9, ne
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
diff --git a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
index 9924b7c63f763..ef004085373cd 100644
--- a/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
+++ b/llvm/test/CodeGen/AArch64/i128_with_overflow.ll
@@ -223,22 +223,49 @@ cleanup:
define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
; CHECK-LABEL: test_umul_i128:
-; CHECK: // %bb.0: // %entry
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: cbz x1, .LBB4_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cbz x3, .LBB4_5
+; CHECK-NEXT: // %bb.2: // %overflow
; CHECK-NEXT: mul x9, x3, x0
; CHECK-NEXT: cmp x1, #0
; CHECK-NEXT: ccmp x3, #0, #4, ne
-; CHECK-NEXT: umulh x8, x1, x2
-; CHECK-NEXT: umulh x10, x3, x0
+; CHECK-NEXT: umulh x10, x1, x2
+; CHECK-NEXT: umulh x8, x3, x0
; CHECK-NEXT: madd x9, x1, x2, x9
-; CHECK-NEXT: ccmp xzr, x8, #0, eq
-; CHECK-NEXT: umulh x11, x0, x2
; CHECK-NEXT: ccmp xzr, x10, #0, eq
+; CHECK-NEXT: umulh x11, x0, x2
+; CHECK-NEXT: ccmp xzr, x8, #0, eq
+; CHECK-NEXT: mul x0, x0, x2
; CHECK-NEXT: cset w8, ne
; CHECK-NEXT: adds x1, x11, x9
; CHECK-NEXT: csinc w8, w8, wzr, lo
-; CHECK-NEXT: cmp w8, #1
-; CHECK-NEXT: b.ne .LBB4_2
-; CHECK-NEXT: // %bb.1: // %if.then
+; CHECK-NEXT: tbnz w8, #0, .LBB4_7
+; CHECK-NEXT: b .LBB4_8
+; CHECK-NEXT: .LBB4_3: // %overflow.no.lhs
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: cbz x3, .LBB4_9
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: madd x8, x1, x2, x8
+; CHECK-NEXT: umulh x9, x0, x3
+; CHECK-NEXT: mul x10, x0, x3
+; CHECK-NEXT: mul x11, x1, x3
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: b .LBB4_6
+; CHECK-NEXT: .LBB4_5: // %overflow.no.rhs.only
+; CHECK-NEXT: umulh x8, x2, x0
+; CHECK-NEXT: umulh x9, x2, x1
+; CHECK-NEXT: madd x8, x3, x0, x8
+; CHECK-NEXT: mul x10, x2, x1
+; CHECK-NEXT: mul x11, x3, x1
+; CHECK-NEXT: mul x0, x2, x0
+; CHECK-NEXT: .LBB4_6: // %overflow.res
+; CHECK-NEXT: adds x1, x8, x10
+; CHECK-NEXT: adcs xzr, x9, x11
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: tbz w8, #0, .LBB4_8
+; CHECK-NEXT: .LBB4_7: // %if.then
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
@@ -247,10 +274,15 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
; CHECK-NEXT: sxtw x0, w0
; CHECK-NEXT: asr x1, x0, #63
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .LBB4_8: // %cleanup
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB4_2: // %if.end
+; CHECK-NEXT: .LBB4_9: // %overflow.no
+; CHECK-NEXT: madd x8, x0, x3, x8
; CHECK-NEXT: mul x0, x0, x2
-; CHECK-NEXT: ret
+; CHECK-NEXT: madd x1, x1, x2, x8
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: tbnz w8, #0, .LBB4_7
+; CHECK-NEXT: b .LBB4_8
entry:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
%1 = extractvalue { i128, i1 } %0, 1
@@ -272,35 +304,115 @@ cleanup:
define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
; CHECK-LABEL: test_smul_i128:
-; CHECK: // %bb.0: // %entry
+; CHECK: // %bb.0: // %overflow.entry
+; CHECK-NEXT: asr x8, x2, #63
+; CHECK-NEXT: cmp x1, x0, asr #63
+; CHECK-NEXT: b.eq .LBB5_3
+; CHECK-NEXT: // %bb.1: // %overflow.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB5_5
+; CHECK-NEXT: // %bb.2: // %overflow
+; CHECK-NEXT: asr x9, x1, #63
+; CHECK-NEXT: umulh x10, x0, x2
+; CHECK-NEXT: asr x13, x3, #63
+; CHECK-NEXT: mul x11, x1, x2
+; CHECK-NEXT: umulh x8, x1, x2
+; CHECK-NEXT: mul x9, x9, x2
+; CHECK-NEXT: adds x10, x11, x10
+; CHECK-NEXT: mul x14, x0, x3
+; CHECK-NEXT: umulh x12, x0, x3
+; CHECK-NEXT: adc x8, x8, x9
+; CHECK-NEXT: mul x13, x0, x13
+; CHECK-NEXT: asr x11, x8, #63
+; CHECK-NEXT: adds x9, x14, x10
+; CHECK-NEXT: mul x15, x1, x3
+; CHECK-NEXT: smulh x10, x1, x3
+; CHECK-NEXT: mov x1, x9
+; CHECK-NEXT: adc x9, x12, x13
+; CHECK-NEXT: asr x12, x9, #63
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: adds x8, x8, x9
+; CHECK-NEXT: asr x9, x1, #63
+; CHECK-NEXT: adc x11, x11, x12
+; CHECK-NEXT: adds x8, x15, x8
+; CHECK-NEXT: adc x10, x10, x11
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: ccmp x10, x9, #0, eq
+; CHECK-NEXT: b .LBB5_7
+; CHECK-NEXT: .LBB5_3: // %overflow.no.lhs
+; CHECK-NEXT: cmp x3, x8
+; CHECK-NEXT: b.eq .LBB5_10
+; CHECK-NEXT: // %bb.4: // %overflow.no.lhs.only
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x10, x3, #63
+; CHECK-NEXT: eor x9, x0, x8
+; CHECK-NEXT: eor x11, x1, x8
+; CHECK-NEXT: eor x12, x2, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: eor x11, x3, x10
+; CHECK-NEXT: csel x8, x8, x1, lt
+; CHECK-NEXT: csel x9, x9, x0, lt
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: csel x11, x12, x2, lt
+; CHECK-NEXT: csel x10, x10, x3, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w12, w13
+; CHECK-NEXT: b .LBB5_6
+; CHECK-NEXT: .LBB5_5: // %overflow.no.rhs.only
+; CHECK-NEXT: asr x8, x3, #63
; CHECK-NEXT: asr x10, x1, #63
-; CHECK-NEXT: umulh x11, x0, x2
-; CHECK-NEXT: asr x14, x3, #63
-; CHECK-NEXT: mov x8, x1
-; CHECK-NEXT: mul x12, x1, x2
-; CHECK-NEXT: umulh x9, x1, x2
-; CHECK-NEXT: mul x10, x10, x2
-; CHECK-NEXT: adds x11, x12, x11
-; CHECK-NEXT: mul x15, x0, x3
-; CHECK-NEXT: umulh x13, x0, x3
-; CHECK-NEXT: adc x9, x9, x10
-; CHECK-NEXT: mul x14, x0, x14
-; CHECK-NEXT: mul x16, x1, x3
-; CHECK-NEXT: adds x1, x15, x11
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: smulh x8, x8, x3
-; CHECK-NEXT: adc x10, x13, x14
-; CHECK-NEXT: asr x12, x10, #63
-; CHECK-NEXT: adds x9, x9, x10
-; CHECK-NEXT: adc x10, x11, x12
-; CHECK-NEXT: adds x9, x16, x9
-; CHECK-NEXT: asr x11, x1, #63
-; CHECK-NEXT: adc x8, x8, x10
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: orr x8, x9, x8
-; CHECK-NEXT: cbz x8, .LBB5_2
-; CHECK-NEXT: // %bb.1: // %if.then
+; CHECK-NEXT: eor x9, x2, x8
+; CHECK-NEXT: eor x11, x3, x8
+; CHECK-NEXT: eor x12, x0, x10
+; CHECK-NEXT: subs x9, x9, x8
+; CHECK-NEXT: sbc x8, x11, x8
+; CHECK-NEXT: cmp x3, #0
+; CHECK-NEXT: eor x11, x1, x10
+; CHECK-NEXT: csel x8, x8, x3, lt
+; CHECK-NEXT: csel x9, x9, x2, lt
+; CHECK-NEXT: cset w13, lt
+; CHECK-NEXT: subs x12, x12, x10
+; CHECK-NEXT: sbc x10, x11, x10
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x11, x12, x0, lt
+; CHECK-NEXT: csel x10, x10, x1, lt
+; CHECK-NEXT: umulh x12, x9, x11
+; CHECK-NEXT: mul x14, x9, x11
+; CHECK-NEXT: mul x15, x8, x10
+; CHECK-NEXT: madd x8, x8, x11, x12
+; CHECK-NEXT: cset w12, lt
+; CHECK-NEXT: mul x11, x9, x10
+; CHECK-NEXT: umulh x9, x9, x10
+; CHECK-NEXT: eor w10, w13, w12
+; CHECK-NEXT: .LBB5_6: // %overflow.res
+; CHECK-NEXT: sbfx x12, x10, #0, #1
+; CHECK-NEXT: adds x8, x8, x11
+; CHECK-NEXT: adc x9, x9, x15
+; CHECK-NEXT: eor x13, x14, x12
+; CHECK-NEXT: eor x8, x8, x12
+; CHECK-NEXT: add x0, x13, x10
+; CHECK-NEXT: cmp x0, x10
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: cinc x1, x8, lo
+; CHECK-NEXT: eor x8, x9, x12
+; CHECK-NEXT: cmp x1, x10
+; CHECK-NEXT: cinc x8, x8, lo
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: .LBB5_7: // %overflow.res
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: tbz w8, #0, .LBB5_9
+; CHECK-NEXT: .LBB5_8: // %if.then
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
@@ -309,10 +421,16 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
; CHECK-NEXT: sxtw x0, w0
; CHECK-NEXT: asr x1, x0, #63
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .LBB5_9: // %cleanup
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB5_2: // %if.end
+; CHECK-NEXT: .LBB5_10: // %overflow.no
+; CHECK-NEXT: umulh x8, x0, x2
+; CHECK-NEXT: madd x8, x0, x3, x8
; CHECK-NEXT: mul x0, x0, x2
-; CHECK-NEXT: ret
+; CHECK-NEXT: madd x1, x1, x2, x8
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: tbnz w8, #0, .LBB5_8
+; CHECK-NEXT: b .LBB5_9
entry:
%0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%1 = extractvalue { i128, i1 } %0, 1
diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
index edfd80b4f2706..a240055b3f655 100644
--- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
@@ -3,20 +3,54 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; AARCH-LABEL: muloti_test:
-; AARCH: // %bb.0: // %start
+; AARCH: // %bb.0: // %overflow.entry
+; AARCH-NEXT: cbz x1, .LBB0_3
+; AARCH-NEXT: // %bb.1: // %overflow.lhs
+; AARCH-NEXT: cbz x3, .LBB0_5
+; AARCH-NEXT: // %bb.2: // %overflow
; AARCH-NEXT: mul x9, x3, x0
; AARCH-NEXT: cmp x1, #0
; AARCH-NEXT: ccmp x3, #0, #4, ne
-; AARCH-NEXT: umulh x8, x1, x2
-; AARCH-NEXT: umulh x10, x3, x0
+; AARCH-NEXT: umulh x10, x1, x2
+; AARCH-NEXT: umulh x8, x3, x0
; AARCH-NEXT: madd x9, x1, x2, x9
-; AARCH-NEXT: ccmp xzr, x8, #0, eq
-; AARCH-NEXT: umulh x11, x0, x2
; AARCH-NEXT: ccmp xzr, x10, #0, eq
+; AARCH-NEXT: umulh x11, x0, x2
+; AARCH-NEXT: ccmp xzr, x8, #0, eq
; AARCH-NEXT: mul x0, x0, x2
; AARCH-NEXT: cset w8, ne
; AARCH-NEXT: adds x1, x11, x9
-; AARCH-NEXT: csinc w2, w8, wzr, lo
+; AARCH-NEXT: csinc w8, w8, wzr, lo
+; AARCH-NEXT: and w2, w8, #0x1
+; AARCH-NEXT: ret
+; AARCH-NEXT: .LBB0_3: // %overflow.no.lhs
+; AARCH-NEXT: umulh x8, x0, x2
+; AARCH-NEXT: cbz x3, .LBB0_7
+; AARCH-NEXT: // %bb.4: // %overflow.no.lhs.only
+; AARCH-NEXT: madd x8, x1, x2, x8
+; AARCH-NEXT: umulh x9, x0, x3
+; AARCH-NEXT: mul x10, x0, x3
+; AARCH-NEXT: mul x11, x1, x3
+; AARCH-NEXT: mul x0, x0, x2
+; AARCH-NEXT: b .LBB0_6
+; AARCH-NEXT: .LBB0_5: // %overflow.no.rhs.only
+; AARCH-NEXT: umulh x8, x2, x0
+; AARCH-NEXT: umulh x9, x2, x1
+; AARCH-NEXT: madd x8, x3, x0, x8
+; AARCH-NEXT: mul x10, x2, x1
+; AARCH-NEXT: mul x11, x3, x1
+; AARCH-NEXT: mul x0, x2, x0
+; AARCH-NEXT: .LBB0_6: // %overflow.res
+; AARCH-NEXT: adds x1, x8, x10
+; AARCH-NEXT: adcs xzr, x9, x11
+; AARCH-NEXT: cset w8, ne
+; AARCH-NEXT: and w2, w8, #0x1
+; AARCH-NEXT: ret
+; AARCH-NEXT: .LBB0_7: // %overflow.no
+; AARCH-NEXT: madd x8, x0, x3, x8
+; AARCH-NEXT: mul x0, x0, x2
+; AARCH-NEXT: madd x1, x1, x2, x8
+; AARCH-NEXT: and w2, wzr, #0x1
; AARCH-NEXT: ret
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
@@ -34,46 +68,133 @@ start:
define i128 @__muloti4(i128 %0, i128 %1, ptr nocapture nonnull writeonly align 4 %2) #2 {
; AARCH-LABEL: __muloti4:
-; AARCH: // %bb.0: // %Entry
-; AARCH-NEXT: asr x11, x1, #63
-; AARCH-NEXT: asr x9, x3, #63
-; AARCH-NEXT: umulh x12, x0, x2
-; AARCH-NEXT: mov x8, x1
+; AARCH: // %bb.0: // %overflow.entry
+; AARCH-NEXT: asr x8, x2, #63
+; AARCH-NEXT: cmp x1, x0, asr #63
; AARCH-NEXT: str wzr, [x4]
-; AARCH-NEXT: mul x13, x1, x2
-; AARCH-NEXT: umulh x10, x1, x2
-; AARCH-NEXT: mul x11, x11, x2
-; AARCH-NEXT: adds x12, x13, x12
-; AARCH-NEXT: mul x15, x0, x3
-; AARCH-NEXT: umulh x14, x0, x3
-; AARCH-NEXT: adc x10, x10, x11
-; AARCH-NEXT: mul x9, x0, x9
-; AARCH-NEXT: mul x16, x1, x3
-; AARCH-NEXT: adds x1, x15, x12
-; AARCH-NEXT: asr x12, x10, #63
-; AARCH-NEXT: smulh x11, x8, x3
-; AARCH-NEXT: adc x9, x14, x9
-; AARCH-NEXT: asr x13, x9, #63
-; AARCH-NEXT: adds x9, x10, x9
-; AARCH-NEXT: asr x10, x1, #63
+; AARCH-NEXT: b.eq .LBB1_3
+; AARCH-NEXT: // %bb.1: // %overflow.lhs
+; AARCH-NEXT: cmp x3, x8
+; AARCH-NEXT: b.eq .LBB1_5
+; AARCH-NEXT: // %bb.2: // %overflow
+; AARCH-NEXT: asr x9, x1, #63
+; AARCH-NEXT: umulh x10, x0, x2
+; AARCH-NEXT: asr x13, x3, #63
+; AARCH-NEXT: mul x11, x1, x2
+; AARCH-NEXT: umulh x8, x1, x2
+; AARCH-NEXT: mul x9, x9, x2
+; AARCH-NEXT: adds x10, x11, x10
+; AARCH-NEXT: mul x14, x0, x3
+; AARCH-NEXT: umulh x12, x0, x3
+; AARCH-NEXT: adc x9, x8, x9
+; AARCH-NEXT: mul x13, x0, x13
+; AARCH-NEXT: adds x8, x14, x10
+; AARCH-NEXT: mul x15, x1, x3
+; AARCH-NEXT: smulh x10, x1, x3
+; AARCH-NEXT: adc x11, x12, x13
+; AARCH-NEXT: asr x12, x9, #63
+; AARCH-NEXT: asr x13, x11, #63
; AARCH-NEXT: mul x0, x0, x2
+; AARCH-NEXT: adds x9, x9, x11
+; AARCH-NEXT: asr x11, x8, #63
; AARCH-NEXT: adc x12, x12, x13
-; AARCH-NEXT: adds x9, x16, x9
-; AARCH-NEXT: adc x11, x11, x12
-; AARCH-NEXT: cmp x9, x10
-; AARCH-NEXT: ccmp x11, x10, #0, eq
+; AARCH-NEXT: adds x9, x15, x9
+; AARCH-NEXT: adc x10, x10, x12
+; AARCH-NEXT: cmp x9, x11
+; AARCH-NEXT: ccmp x10, x11, #0, eq
+; AARCH-NEXT: b .LBB1_7
+; AARCH-NEXT: .LBB1_3: // %overflow.no.lhs
+; AARCH-NEXT: cmp x3, x8
+; AARCH-NEXT: b.eq .LBB1_8
+; AARCH-NEXT: // %bb.4: // %overflow.no.lhs.only
+; AARCH-NEXT: asr x8, x1, #63
+; AARCH-NEXT: asr x10, x3, #63
+; AARCH-NEXT: eor x9, x0, x8
+; AARCH-NEXT: eor x11, x1, x8
+; AARCH-NEXT: eor x12, x2, x10
+; AARCH-NEXT: subs x9, x9, x8
+; AARCH-NEXT: sbc x8, x11, x8
+; AARCH-NEXT: cmp x1, #0
+; AARCH-NEXT: eor x11, x3, x10
+; AARCH-NEXT: cset w13, lt
+; AARCH-NEXT: csel x8, x8, x1, lt
+; AARCH-NEXT: csel x9, x9, x0, lt
+; AARCH-NEXT: subs x12, x12, x10
+; AARCH-NEXT: sbc x10, x11, x10
+; AARCH-NEXT: cmp x3, #0
+; AARCH-NEXT: csel x11, x12, x2, lt
+; AARCH-NEXT: csel x10, x10, x3, lt
+; AARCH-NEXT: umulh x12, x9, x11
+; AARCH-NEXT: mul x15, x8, x10
+; AARCH-NEXT: madd x8, x8, x11, x12
+; AARCH-NEXT: cset w12, lt
+; AARCH-NEXT: mul x14, x9, x11
+; AARCH-NEXT: mul x11, x9, x10
+; AARCH-NEXT: umulh x9, x9, x10
+; AARCH-NEXT: eor w10, w12, w13
+; AARCH-NEXT: b .LBB1_6
+; AARCH-NEXT: .LBB1_5: // %overflow.no.rhs.only
+; AARCH-NEXT: asr x8, x3, #63
+; AARCH-NEXT: asr x10, x1, #63
+; AARCH-NEXT: eor x9, x2, x8
+; AARCH-NEXT: eor x11, x3, x8
+; AARCH-NEXT: eor x12, x0, x10
+; AARCH-NEXT: subs x9, x9, x8
+; AARCH-NEXT: sbc x8, x11, x8
+; AARCH-NEXT: cmp x3, #0
+; AARCH-NEXT: eor x11, x1, x10
+; AARCH-NEXT: cset w13, lt
+; AARCH-NEXT: csel x8, x8, x3, lt
+; AARCH-NEXT: csel x9, x9, x2, lt
+; AARCH-NEXT: subs x12, x12, x10
+; AARCH-NEXT: sbc x10, x11, x10
+; AARCH-NEXT: cmp x1, #0
+; AARCH-NEXT: csel x11, x12, x0, lt
+; AARCH-NEXT: csel x10, x10, x1, lt
+; AARCH-NEXT: umulh x12, x9, x11
+; AARCH-NEXT: mul x14, x9, x11
+; AARCH-NEXT: mul x15, x8, x10
+; AARCH-NEXT: madd x8, x8, x11, x12
+; AARCH-NEXT: cset w12, lt
+; AARCH-NEXT: mul x11, x9, x10
+; AARCH-NEXT: umulh x9, x9, x10
+; AARCH-NEXT: eor w10, w13, w12
+; AARCH-NEXT: .LBB1_6: // %overflow.res
+; AARCH-NEXT: sbfx x12, x10, #0, #1
+; AARCH-NEXT: adds x8, x8, x11
+; AARCH-NEXT: adc x9, x9, x15
+; AARCH-NEXT: eor x13, x14, x12
+; AARCH-NEXT: eor x8, x8, x12
+; AARCH-NEXT: eor x9, x9, x12
+; AARCH-NEXT: add x0, x13, x10
+; AARCH-NEXT: cmp x0, x10
+; AARCH-NEXT: cset w10, lo
+; AARCH-NEXT: cinc x8, x8, lo
+; AARCH-NEXT: cmp x8, x10
+; AARCH-NEXT: cinc x9, x9, lo
+; AARCH-NEXT: cmp x9, #0
+; AARCH-NEXT: .LBB1_7: // %overflow.res
; AARCH-NEXT: cset w9, ne
-; AARCH-NEXT: tbz x8, #63, .LBB1_2
-; AARCH-NEXT: // %bb.1: // %Entry
-; AARCH-NEXT: eor x8, x3, #0x8000000000000000
-; AARCH-NEXT: orr x8, x2, x8
-; AARCH-NEXT: cbz x8, .LBB1_3
-; AARCH-NEXT: .LBB1_2: // %Else2
-; AARCH-NEXT: cbz w9, .LBB1_4
-; AARCH-NEXT: .LBB1_3: // %Then7
-; AARCH-NEXT: mov w8, #1 // =0x1
-; AARCH-NEXT: str w8, [x4]
-; AARCH-NEXT: .LBB1_4: // %Block9
+; AARCH-NEXT: tbnz x1, #63, .LBB1_9
+; AARCH-NEXT: b .LBB1_10
+; AARCH-NEXT: .LBB1_8: // %overflow.no
+; AARCH-NEXT: umulh x8, x0, x2
+; AARCH-NEXT: mov w9, wzr
+; AARCH-NEXT: madd x8, x0, x3, x8
+; AARCH-NEXT: mul x0, x0, x2
+; AARCH-NEXT: madd x8, x1, x2, x8
+; AARCH-NEXT: tbz x1, #63, .LBB1_10
+; AARCH-NEXT: .LBB1_9: // %overflow.res
+; AARCH-NEXT: eor x10, x3, #0x8000000000000000
+; AARCH-NEXT: orr x10, x2, x10
+; AARCH-NEXT: cbz x10, .LBB1_11
+; AARCH-NEXT: .LBB1_10: // %Else2
+; AARCH-NEXT: tbz w9, #0, .LBB1_12
+; AARCH-NEXT: .LBB1_11: // %Then7
+; AARCH-NEXT: mov w9, #1 // =0x1
+; AARCH-NEXT: str w9, [x4]
+; AARCH-NEXT: .LBB1_12: // %Block9
+; AARCH-NEXT: mov x1, x8
; AARCH-NEXT: ret
Entry:
store i32 0, ptr %2, align 4
diff --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
index 4eb82c80e2bff..8f35b6df7a937 100644
--- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll
@@ -4,212 +4,425 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; ARMV6-LABEL: muloti_test:
-; ARMV6: @ %bb.0: @ %start
+; ARMV6: @ %bb.0: @ %overflow.entry
; ARMV6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ARMV6-NEXT: sub sp, sp, #28
-; ARMV6-NEXT: ldr r4, [sp, #72]
-; ARMV6-NEXT: mov r7, r0
-; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV6-NEXT: ldr r12, [sp, #64]
-; ARMV6-NEXT: umull r1, r0, r2, r4
+; ARMV6-NEXT: add lr, sp, #76
; ARMV6-NEXT: ldr r5, [sp, #68]
-; ARMV6-NEXT: str r1, [r7]
-; ARMV6-NEXT: ldr r1, [sp, #76]
-; ARMV6-NEXT: umull r7, r6, r1, r12
-; ARMV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; ARMV6-NEXT: umull r6, r9, r5, r4
-; ARMV6-NEXT: add r7, r6, r7
-; ARMV6-NEXT: umull r4, r6, r12, r4
-; ARMV6-NEXT: str r4, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: mov r4, #0
-; ARMV6-NEXT: adds r8, r6, r7
-; ARMV6-NEXT: ldr r6, [sp, #80]
-; ARMV6-NEXT: adc r7, r4, #0
-; ARMV6-NEXT: ldr r4, [sp, #84]
-; ARMV6-NEXT: str r7, [sp, #24] @ 4-byte Spill
-; ARMV6-NEXT: umull r12, lr, r3, r6
-; ARMV6-NEXT: umull r11, r7, r4, r2
-; ARMV6-NEXT: add r12, r11, r12
-; ARMV6-NEXT: umull r11, r10, r6, r2
-; ARMV6-NEXT: adds r12, r10, r12
-; ARMV6-NEXT: mov r10, #0
-; ARMV6-NEXT: adc r6, r10, #0
-; ARMV6-NEXT: str r6, [sp, #20] @ 4-byte Spill
-; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT: adds r6, r6, r11
-; ARMV6-NEXT: str r6, [sp, #12] @ 4-byte Spill
-; ARMV6-NEXT: adc r6, r8, r12
-; ARMV6-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV6-NEXT: ldr r6, [sp, #72]
-; ARMV6-NEXT: mov r12, #0
-; ARMV6-NEXT: umull r2, r8, r2, r1
-; ARMV6-NEXT: umlal r0, r12, r3, r6
-; ARMV6-NEXT: adds r0, r2, r0
-; ARMV6-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; ARMV6-NEXT: adcs r8, r12, r8
-; ARMV6-NEXT: adc r12, r10, #0
-; ARMV6-NEXT: cmp lr, #0
-; ARMV6-NEXT: str r0, [r2, #4]
-; ARMV6-NEXT: movne lr, #1
-; ARMV6-NEXT: ldr r11, [sp, #8] @ 4-byte Reload
-; ARMV6-NEXT: cmp r7, #0
-; ARMV6-NEXT: movne r7, #1
-; ARMV6-NEXT: ldr r0, [sp, #64]
-; ARMV6-NEXT: cmp r11, #0
-; ARMV6-NEXT: umlal r8, r12, r3, r1
-; ARMV6-NEXT: movne r11, #1
-; ARMV6-NEXT: cmp r9, #0
-; ARMV6-NEXT: movne r9, #1
-; ARMV6-NEXT: orrs r10, r0, r5
-; ARMV6-NEXT: ldr r0, [sp, #80]
+; ARMV6-NEXT: ldr r6, [sp, #64]
+; ARMV6-NEXT: mov r9, r0
+; ARMV6-NEXT: ldr r11, [sp, #72]
+; ARMV6-NEXT: orrs r10, r6, r5
+; ARMV6-NEXT: ldm lr, {r1, r12, lr}
+; ARMV6-NEXT: beq .LBB0_3
+; ARMV6-NEXT: @ %bb.1: @ %overflow.lhs
+; ARMV6-NEXT: orrs r8, r12, lr
+; ARMV6-NEXT: beq .LBB0_5
+; ARMV6-NEXT: @ %bb.2: @ %overflow
+; ARMV6-NEXT: umull r4, r0, r3, r12
+; ARMV6-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; ARMV6-NEXT: umull r7, r0, lr, r2
+; ARMV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; ARMV6-NEXT: umull r0, r12, r12, r2
+; ARMV6-NEXT: add r4, r7, r4
+; ARMV6-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: adds r7, r12, r4
+; ARMV6-NEXT: str r7, [sp] @ 4-byte Spill
+; ARMV6-NEXT: adc r0, r0, #0
+; ARMV6-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT: mov r0, r11
+; ARMV6-NEXT: umull r11, r12, r1, r6
+; ARMV6-NEXT: umull r7, r4, r5, r0
+; ARMV6-NEXT: add r7, r7, r11
+; ARMV6-NEXT: umull r11, r6, r6, r0
+; ARMV6-NEXT: adds r6, r6, r7
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: adc r7, r7, #0
+; ARMV6-NEXT: str r7, [sp, #4] @ 4-byte Spill
+; ARMV6-NEXT: ldr r7, [sp, #24] @ 4-byte Reload
+; ARMV6-NEXT: adds r7, r11, r7
+; ARMV6-NEXT: str r7, [sp, #8] @ 4-byte Spill
+; ARMV6-NEXT: ldr r7, [sp] @ 4-byte Reload
+; ARMV6-NEXT: adc r6, r6, r7
+; ARMV6-NEXT: str r6, [sp] @ 4-byte Spill
+; ARMV6-NEXT: umull r11, r6, r2, r0
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: umlal r6, r7, r3, r0
+; ARMV6-NEXT: umull r2, r0, r2, r1
+; ARMV6-NEXT: adds r2, r2, r6
+; ARMV6-NEXT: str r2, [sp, #24] @ 4-byte Spill
+; ARMV6-NEXT: adcs r0, r7, r0
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: adc r6, r7, #0
+; ARMV6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARMV6-NEXT: umlal r0, r6, r3, r1
+; ARMV6-NEXT: adds r2, r0, r2
+; ARMV6-NEXT: ldr r0, [sp] @ 4-byte Reload
+; ARMV6-NEXT: adcs r0, r6, r0
+; ARMV6-NEXT: adc r6, r7, #0
+; ARMV6-NEXT: cmp r8, #0
+; ARMV6-NEXT: movne r8, #1
+; ARMV6-NEXT: cmp r10, #0
; ARMV6-NEXT: movne r10, #1
-; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
-; ARMV6-NEXT: orrs r0, r0, r4
-; ARMV6-NEXT: movne r0, #1
; ARMV6-NEXT: cmp r4, #0
; ARMV6-NEXT: movne r4, #1
-; ARMV6-NEXT: cmp r3, #0
-; ARMV6-NEXT: movne r3, #1
-; ARMV6-NEXT: cmp r5, #0
-; ARMV6-NEXT: movne r5, #1
; ARMV6-NEXT: cmp r1, #0
; ARMV6-NEXT: movne r1, #1
-; ARMV6-NEXT: adds r6, r8, r6
-; ARMV6-NEXT: str r6, [r2, #8]
+; ARMV6-NEXT: cmp r5, #0
+; ARMV6-NEXT: movne r5, #1
; ARMV6-NEXT: and r1, r5, r1
+; ARMV6-NEXT: cmp r12, #0
+; ARMV6-NEXT: orr r1, r1, r4
+; ARMV6-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; ARMV6-NEXT: movne r12, #1
+; ARMV6-NEXT: orr r1, r1, r12
+; ARMV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
+; ARMV6-NEXT: and r6, r10, r8
+; ARMV6-NEXT: orr r1, r1, r5
+; ARMV6-NEXT: orr r1, r6, r1
+; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
+; ARMV6-NEXT: ldr r7, [sp, #24] @ 4-byte Reload
+; ARMV6-NEXT: cmp r6, #0
+; ARMV6-NEXT: movne r6, #1
+; ARMV6-NEXT: cmp r3, #0
+; ARMV6-NEXT: movne r3, #1
+; ARMV6-NEXT: cmp lr, #0
+; ARMV6-NEXT: movne lr, #1
+; ARMV6-NEXT: and r3, lr, r3
+; ARMV6-NEXT: orr r3, r3, r6
+; ARMV6-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
+; ARMV6-NEXT: cmp r6, #0
+; ARMV6-NEXT: movne r6, #1
+; ARMV6-NEXT: orr r3, r3, r6
; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, r9
-; ARMV6-NEXT: orr r1, r1, r11
-; ARMV6-NEXT: and r0, r10, r0
-; ARMV6-NEXT: adcs r6, r12, r6
-; ARMV6-NEXT: str r6, [r2, #12]
-; ARMV6-NEXT: ldr r6, [sp, #24] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, r6
-; ARMV6-NEXT: orr r0, r0, r1
-; ARMV6-NEXT: and r1, r4, r3
-; ARMV6-NEXT: orr r1, r1, r7
-; ARMV6-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
-; ARMV6-NEXT: orr r1, r1, lr
+; ARMV6-NEXT: orr r3, r3, r6
; ARMV6-NEXT: orr r1, r1, r3
-; ARMV6-NEXT: orr r0, r0, r1
-; ARMV6-NEXT: mov r1, #0
-; ARMV6-NEXT: adc r1, r1, #0
-; ARMV6-NEXT: orr r0, r0, r1
-; ARMV6-NEXT: and r0, r0, #1
-; ARMV6-NEXT: strb r0, [r2, #16]
+; ARMV6-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
+; ARMV6-NEXT: orr r6, r1, r3
+; ARMV6-NEXT: b .LBB0_8
+; ARMV6-NEXT: .LBB0_3: @ %overflow.no.lhs
+; ARMV6-NEXT: orrs r6, r12, lr
+; ARMV6-NEXT: beq .LBB0_7
+; ARMV6-NEXT: @ %bb.4: @ %overflow.no.lhs.only
+; ARMV6-NEXT: umull r0, r4, r2, r12
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: mov r10, #0
+; ARMV6-NEXT: umlal r4, r7, r3, r12
+; ARMV6-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; ARMV6-NEXT: umull r6, r8, r2, lr
+; ARMV6-NEXT: adds r0, r6, r4
+; ARMV6-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; ARMV6-NEXT: adcs r6, r7, r8
+; ARMV6-NEXT: adc r7, r10, #0
+; ARMV6-NEXT: ldr r10, [sp, #64]
+; ARMV6-NEXT: umlal r6, r7, r3, lr
+; ARMV6-NEXT: umull r0, r8, r12, r10
+; ARMV6-NEXT: mla r4, r12, r5, r8
+; ARMV6-NEXT: mov r8, r11
+; ARMV6-NEXT: adds r12, r6, r0
+; ARMV6-NEXT: mov r6, #0
+; ARMV6-NEXT: mla r4, lr, r10, r4
+; ARMV6-NEXT: adc lr, r7, r4
+; ARMV6-NEXT: umull r11, r4, r2, r11
+; ARMV6-NEXT: umlal r4, r6, r3, r8
+; ARMV6-NEXT: umull r2, r0, r2, r1
+; ARMV6-NEXT: adds r7, r2, r4
+; ARMV6-NEXT: adcs r2, r6, r0
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: adc r4, r0, #0
+; ARMV6-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; ARMV6-NEXT: umlal r2, r4, r3, r1
+; ARMV6-NEXT: umull r3, r6, r8, r10
+; ARMV6-NEXT: mla r5, r8, r5, r6
+; ARMV6-NEXT: adds r2, r2, r3
+; ARMV6-NEXT: mla r1, r1, r10, r5
+; ARMV6-NEXT: adc r1, r4, r1
+; ARMV6-NEXT: adds r2, r2, r0
+; ARMV6-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; ARMV6-NEXT: adcs r0, r1, r0
+; ARMV6-NEXT: adcs r1, r12, #0
+; ARMV6-NEXT: adc r3, lr, #0
+; ARMV6-NEXT: b .LBB0_6
+; ARMV6-NEXT: .LBB0_5: @ %overflow.no.rhs.only
+; ARMV6-NEXT: mov r10, r6
+; ARMV6-NEXT: umull r0, r6, r11, r6
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: umlal r6, r7, r1, r10
+; ARMV6-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; ARMV6-NEXT: umull r4, r8, r11, r5
+; ARMV6-NEXT: adds r0, r4, r6
+; ARMV6-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; ARMV6-NEXT: adcs r6, r7, r8
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: adc r7, r0, #0
+; ARMV6-NEXT: umull r0, r8, r10, r12
+; ARMV6-NEXT: mla r4, r10, lr, r8
+; ARMV6-NEXT: umlal r6, r7, r1, r5
+; ARMV6-NEXT: mla r4, r5, r12, r4
+; ARMV6-NEXT: adds r10, r6, r0
+; ARMV6-NEXT: adc r0, r7, r4
+; ARMV6-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; ARMV6-NEXT: mov r0, r11
+; ARMV6-NEXT: umull r11, r6, r11, r2
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: umull r4, r5, r0, r3
+; ARMV6-NEXT: mov r0, #0
+; ARMV6-NEXT: umlal r6, r7, r1, r2
+; ARMV6-NEXT: adds r8, r4, r6
+; ARMV6-NEXT: adcs r4, r7, r5
+; ARMV6-NEXT: adc r5, r0, #0
+; ARMV6-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; ARMV6-NEXT: umlal r4, r5, r1, r3
+; ARMV6-NEXT: mov r7, r8
+; ARMV6-NEXT: umull r1, r6, r2, r12
+; ARMV6-NEXT: mla r2, r2, lr, r6
+; ARMV6-NEXT: adds r1, r4, r1
+; ARMV6-NEXT: mla r2, r3, r12, r2
+; ARMV6-NEXT: adc r3, r5, r2
+; ARMV6-NEXT: adds r2, r1, r0
+; ARMV6-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; ARMV6-NEXT: adcs r0, r3, r0
+; ARMV6-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; ARMV6-NEXT: adcs r1, r10, #0
+; ARMV6-NEXT: adc r3, r3, #0
+; ARMV6-NEXT: .LBB0_6: @ %overflow.res
+; ARMV6-NEXT: orrs r6, r1, r3
+; ARMV6-NEXT: movne r6, #1
+; ARMV6-NEXT: b .LBB0_8
+; ARMV6-NEXT: .LBB0_7: @ %overflow.no
+; ARMV6-NEXT: mov r0, r11
+; ARMV6-NEXT: umull r11, r8, r2, r11
+; ARMV6-NEXT: mov r7, #0
+; ARMV6-NEXT: mov r6, #0
+; ARMV6-NEXT: umlal r8, r7, r3, r0
+; ARMV6-NEXT: umull r4, r10, r2, r1
+; ARMV6-NEXT: adds r0, r4, r8
+; ARMV6-NEXT: ldr r4, [sp, #64]
+; ARMV6-NEXT: adcs r10, r7, r10
+; ARMV6-NEXT: ldr r7, [sp, #72]
+; ARMV6-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; ARMV6-NEXT: adc r0, r6, #0
+; ARMV6-NEXT: umlal r10, r0, r3, r1
+; ARMV6-NEXT: umull r8, r4, r7, r4
+; ARMV6-NEXT: mla r4, r7, r5, r4
+; ARMV6-NEXT: ldr r5, [sp, #64]
+; ARMV6-NEXT: ldr r7, [sp, #24] @ 4-byte Reload
+; ARMV6-NEXT: mla r1, r1, r5, r4
+; ARMV6-NEXT: umull r4, r5, r12, r2
+; ARMV6-NEXT: mla r3, r12, r3, r5
+; ARMV6-NEXT: mla r2, lr, r2, r3
+; ARMV6-NEXT: adds r3, r4, r8
+; ARMV6-NEXT: adc r1, r2, r1
+; ARMV6-NEXT: adds r2, r10, r3
+; ARMV6-NEXT: adc r0, r0, r1
+; ARMV6-NEXT: .LBB0_8: @ %overflow.res
+; ARMV6-NEXT: str r11, [r9]
+; ARMV6-NEXT: str r7, [r9, #4]
+; ARMV6-NEXT: str r2, [r9, #8]
+; ARMV6-NEXT: str r0, [r9, #12]
+; ARMV6-NEXT: and r0, r6, #1
+; ARMV6-NEXT: strb r0, [r9, #16]
; ARMV6-NEXT: add sp, sp, #28
; ARMV6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
;
; ARMV7-LABEL: muloti_test:
-; ARMV7: @ %bb.0: @ %start
+; ARMV7: @ %bb.0: @ %overflow.entry
; ARMV7-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; ARMV7-NEXT: sub sp, sp, #44
-; ARMV7-NEXT: ldr r8, [sp, #88]
-; ARMV7-NEXT: mov r9, r0
-; ARMV7-NEXT: ldr r7, [sp, #96]
-; ARMV7-NEXT: ldr lr, [sp, #100]
-; ARMV7-NEXT: umull r0, r5, r2, r8
-; ARMV7-NEXT: ldr r4, [sp, #80]
-; ARMV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: umull r1, r0, r3, r7
-; ARMV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r11, lr, r2
-; ARMV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT: ldr r1, [sp, #92]
-; ARMV7-NEXT: str r0, [sp] @ 4-byte Spill
-; ARMV7-NEXT: umull r0, r10, r7, r2
-; ARMV7-NEXT: mov r7, r1
-; ARMV7-NEXT: umull r6, r12, r1, r4
-; ARMV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; ARMV7-NEXT: ldr r0, [sp, #84]
-; ARMV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r1, r0, r8
-; ARMV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; ARMV7-NEXT: umull r6, r2, r2, r7
-; ARMV7-NEXT: mov r7, r4
-; ARMV7-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; ARMV7-NEXT: str r2, [sp, #12] @ 4-byte Spill
-; ARMV7-NEXT: umull r2, r6, r4, r8
-; ARMV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; ARMV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
-; ARMV7-NEXT: mov r6, #0
-; ARMV7-NEXT: str r2, [r9]
-; ARMV7-NEXT: umlal r5, r6, r3, r8
-; ARMV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; ARMV7-NEXT: add r4, r4, r2
-; ARMV7-NEXT: adds r2, r10, r4
-; ARMV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; ARMV7-NEXT: mov r2, #0
-; ARMV7-NEXT: adc r2, r2, #0
-; ARMV7-NEXT: cmp r12, #0
-; ARMV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
-; ARMV7-NEXT: movwne r12, #1
+; ARMV7-NEXT: sub sp, sp, #12
+; ARMV7-NEXT: ldr r7, [sp, #52]
+; ARMV7-NEXT: ldr r10, [sp, #48]
+; ARMV7-NEXT: ldr r4, [sp, #68]
+; ARMV7-NEXT: ldr r9, [sp, #64]
+; ARMV7-NEXT: orrs r1, r10, r7
+; ARMV7-NEXT: ldr r12, [sp, #60]
+; ARMV7-NEXT: ldr lr, [sp, #56]
+; ARMV7-NEXT: beq .LBB0_3
+; ARMV7-NEXT: @ %bb.1: @ %overflow.lhs
+; ARMV7-NEXT: orr r5, r9, r4
+; ARMV7-NEXT: cmp r5, #0
+; ARMV7-NEXT: beq .LBB0_5
+; ARMV7-NEXT: @ %bb.2: @ %overflow
+; ARMV7-NEXT: movwne r5, #1
; ARMV7-NEXT: cmp r1, #0
-; ARMV7-NEXT: ldr r2, [sp, #96]
+; ARMV7-NEXT: mov r6, r12
; ARMV7-NEXT: movwne r1, #1
-; ARMV7-NEXT: orrs r10, r7, r0
-; ARMV7-NEXT: movwne r10, #1
-; ARMV7-NEXT: orrs r7, r2, lr
-; ARMV7-NEXT: ldr r2, [sp, #92]
+; ARMV7-NEXT: and r12, r1, r5
+; ARMV7-NEXT: cmp r6, #0
+; ARMV7-NEXT: mov r1, r6
+; ARMV7-NEXT: mov r8, r6
+; ARMV7-NEXT: umull r6, r5, r7, lr
+; ARMV7-NEXT: movwne r1, #1
+; ARMV7-NEXT: cmp r7, #0
; ARMV7-NEXT: movwne r7, #1
-; ARMV7-NEXT: cmp r0, #0
-; ARMV7-NEXT: movwne r0, #1
-; ARMV7-NEXT: cmp r2, #0
-; ARMV7-NEXT: mov r4, r2
-; ARMV7-NEXT: mov r8, r2
-; ARMV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARMV7-NEXT: and r1, r7, r1
+; ARMV7-NEXT: mov r11, #0
+; ARMV7-NEXT: cmp r5, #0
+; ARMV7-NEXT: movwne r5, #1
+; ARMV7-NEXT: orr r1, r1, r5
+; ARMV7-NEXT: umull r5, r7, r8, r10
+; ARMV7-NEXT: cmp r7, #0
+; ARMV7-NEXT: movwne r7, #1
+; ARMV7-NEXT: orr r7, r1, r7
+; ARMV7-NEXT: add r1, r6, r5
+; ARMV7-NEXT: umull r8, r6, r10, lr
+; ARMV7-NEXT: adds r10, r6, r1
+; ARMV7-NEXT: umull r6, r1, r4, r2
+; ARMV7-NEXT: adc r5, r11, #0
+; ARMV7-NEXT: orr r5, r7, r5
+; ARMV7-NEXT: orr r7, r12, r5
+; ARMV7-NEXT: cmp r3, #0
+; ARMV7-NEXT: mov r5, r3
+; ARMV7-NEXT: movwne r5, #1
+; ARMV7-NEXT: cmp r4, #0
; ARMV7-NEXT: movwne r4, #1
-; ARMV7-NEXT: and r0, r0, r4
-; ARMV7-NEXT: mov r4, #0
-; ARMV7-NEXT: adds r5, r2, r5
-; ARMV7-NEXT: str r5, [r9, #4]
-; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; ARMV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; ARMV7-NEXT: and r5, r10, r7
-; ARMV7-NEXT: orr r0, r0, r12
-; ARMV7-NEXT: mov r12, #0
-; ARMV7-NEXT: add r1, r2, r1
-; ARMV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; ARMV7-NEXT: adcs r2, r6, r2
-; ARMV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; ARMV7-NEXT: adc r7, r4, #0
-; ARMV7-NEXT: adds r1, r6, r1
-; ARMV7-NEXT: umlal r2, r7, r3, r8
-; ARMV7-NEXT: adc r4, r4, #0
-; ARMV7-NEXT: orr r0, r0, r4
-; ARMV7-NEXT: orr r0, r5, r0
-; ARMV7-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; ARMV7-NEXT: ldr r5, [sp, #36] @ 4-byte Reload
-; ARMV7-NEXT: adds r5, r5, r4
-; ARMV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; ARMV7-NEXT: adc r1, r1, r4
-; ARMV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT: cmp r1, #0
+; ARMV7-NEXT: and r5, r4, r5
+; ARMV7-NEXT: movwne r1, #1
+; ARMV7-NEXT: orr r1, r5, r1
+; ARMV7-NEXT: umull r5, r4, r3, r9
; ARMV7-NEXT: cmp r4, #0
+; ARMV7-NEXT: add r6, r6, r5
; ARMV7-NEXT: movwne r4, #1
-; ARMV7-NEXT: cmp r3, #0
-; ARMV7-NEXT: movwne r3, #1
-; ARMV7-NEXT: cmp lr, #0
-; ARMV7-NEXT: movwne lr, #1
-; ARMV7-NEXT: cmp r11, #0
-; ARMV7-NEXT: movwne r11, #1
-; ARMV7-NEXT: adds r2, r2, r5
-; ARMV7-NEXT: and r3, lr, r3
-; ARMV7-NEXT: str r2, [r9, #8]
-; ARMV7-NEXT: adcs r1, r7, r1
-; ARMV7-NEXT: str r1, [r9, #12]
-; ARMV7-NEXT: orr r1, r3, r11
-; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
; ARMV7-NEXT: orr r1, r1, r4
-; ARMV7-NEXT: orr r1, r1, r2
-; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: adc r1, r12, #0
-; ARMV7-NEXT: orr r0, r0, r1
-; ARMV7-NEXT: and r0, r0, #1
-; ARMV7-NEXT: strb r0, [r9, #16]
-; ARMV7-NEXT: add sp, sp, #44
+; ARMV7-NEXT: umull r5, r4, r9, r2
+; ARMV7-NEXT: adds r6, r4, r6
+; ARMV7-NEXT: adc r4, r11, #0
+; ARMV7-NEXT: orr r1, r1, r4
+; ARMV7-NEXT: mov r4, #0
+; ARMV7-NEXT: orr r12, r7, r1
+; ARMV7-NEXT: adds r7, r8, r5
+; ARMV7-NEXT: umull r8, r5, r2, lr
+; ARMV7-NEXT: adc r6, r10, r6
+; ARMV7-NEXT: umlal r5, r4, r3, lr
+; ARMV7-NEXT: ldr lr, [sp, #60]
+; ARMV7-NEXT: umull r2, r1, r2, lr
+; ARMV7-NEXT: adds r5, r2, r5
+; ARMV7-NEXT: adcs r1, r4, r1
+; ARMV7-NEXT: adc r4, r11, #0
+; ARMV7-NEXT: umlal r1, r4, r3, lr
+; ARMV7-NEXT: adds r2, r1, r7
+; ARMV7-NEXT: adcs r3, r4, r6
+; ARMV7-NEXT: adc r1, r11, #0
+; ARMV7-NEXT: orr r1, r12, r1
+; ARMV7-NEXT: b .LBB0_8
+; ARMV7-NEXT: .LBB0_3: @ %overflow.no.lhs
+; ARMV7-NEXT: orrs r1, r9, r4
+; ARMV7-NEXT: beq .LBB0_7
+; ARMV7-NEXT: @ %bb.4: @ %overflow.no.lhs.only
+; ARMV7-NEXT: umull r1, r5, r2, r9
+; ARMV7-NEXT: mov r6, #0
+; ARMV7-NEXT: mov r11, #0
+; ARMV7-NEXT: umlal r5, r6, r3, r9
+; ARMV7-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT: umull r1, r8, r2, r4
+; ARMV7-NEXT: adds r1, r1, r5
+; ARMV7-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; ARMV7-NEXT: adcs r5, r6, r8
+; ARMV7-NEXT: adc r6, r11, #0
+; ARMV7-NEXT: umull r8, r11, r9, r10
+; ARMV7-NEXT: mla r1, r9, r7, r11
+; ARMV7-NEXT: umlal r5, r6, r3, r4
+; ARMV7-NEXT: mla r1, r4, r10, r1
+; ARMV7-NEXT: adds r4, r5, r8
+; ARMV7-NEXT: umull r8, r5, r2, lr
+; ARMV7-NEXT: adc r9, r6, r1
+; ARMV7-NEXT: mov r6, #0
+; ARMV7-NEXT: umlal r5, r6, r3, lr
+; ARMV7-NEXT: umull r2, r1, r2, r12
+; ARMV7-NEXT: adds r5, r2, r5
+; ARMV7-NEXT: mov r2, #0
+; ARMV7-NEXT: adcs r1, r6, r1
+; ARMV7-NEXT: adc r2, r2, #0
+; ARMV7-NEXT: umlal r1, r2, r3, r12
+; ARMV7-NEXT: umull r3, r6, lr, r10
+; ARMV7-NEXT: mla r7, lr, r7, r6
+; ARMV7-NEXT: adds r1, r1, r3
+; ARMV7-NEXT: mla r7, r12, r10, r7
+; ARMV7-NEXT: adc r3, r2, r7
+; ARMV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARMV7-NEXT: adds r2, r1, r2
+; ARMV7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT: adcs r3, r3, r1
+; ARMV7-NEXT: adcs r1, r4, #0
+; ARMV7-NEXT: adc r7, r9, #0
+; ARMV7-NEXT: b .LBB0_6
+; ARMV7-NEXT: .LBB0_5: @ %overflow.no.rhs.only
+; ARMV7-NEXT: umull r1, r5, lr, r10
+; ARMV7-NEXT: mov r11, #0
+; ARMV7-NEXT: umull r6, r8, lr, r7
+; ARMV7-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT: mov r1, #0
+; ARMV7-NEXT: umlal r5, r1, r12, r10
+; ARMV7-NEXT: adds r5, r6, r5
+; ARMV7-NEXT: str r5, [sp, #4] @ 4-byte Spill
+; ARMV7-NEXT: adcs r1, r1, r8
+; ARMV7-NEXT: adc r5, r11, #0
+; ARMV7-NEXT: umull r8, r11, r10, r9
+; ARMV7-NEXT: mla r6, r10, r4, r11
+; ARMV7-NEXT: umlal r1, r5, r12, r7
+; ARMV7-NEXT: mla r6, r7, r9, r6
+; ARMV7-NEXT: mov r7, #0
+; ARMV7-NEXT: adds r10, r1, r8
+; ARMV7-NEXT: adc r11, r5, r6
+; ARMV7-NEXT: umull r8, r5, lr, r2
+; ARMV7-NEXT: umlal r5, r7, r12, r2
+; ARMV7-NEXT: umull r1, r6, lr, r3
+; ARMV7-NEXT: adds r5, r1, r5
+; ARMV7-NEXT: adcs r1, r7, r6
+; ARMV7-NEXT: mov r7, #0
+; ARMV7-NEXT: adc r7, r7, #0
+; ARMV7-NEXT: umlal r1, r7, r12, r3
+; ARMV7-NEXT: umull r12, r6, r2, r9
+; ARMV7-NEXT: mla r2, r2, r4, r6
+; ARMV7-NEXT: adds r1, r1, r12
+; ARMV7-NEXT: mla r2, r3, r9, r2
+; ARMV7-NEXT: adc r3, r7, r2
+; ARMV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; ARMV7-NEXT: adds r2, r1, r2
+; ARMV7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT: adcs r3, r3, r1
+; ARMV7-NEXT: adcs r1, r10, #0
+; ARMV7-NEXT: adc r7, r11, #0
+; ARMV7-NEXT: .LBB0_6: @ %overflow.res
+; ARMV7-NEXT: orrs r1, r1, r7
+; ARMV7-NEXT: movwne r1, #1
+; ARMV7-NEXT: b .LBB0_8
+; ARMV7-NEXT: .LBB0_7: @ %overflow.no
+; ARMV7-NEXT: umull r1, r11, r2, lr
+; ARMV7-NEXT: mov r6, #0
+; ARMV7-NEXT: umlal r11, r6, r3, lr
+; ARMV7-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; ARMV7-NEXT: mov r1, #0
+; ARMV7-NEXT: umull r5, r8, r2, r12
+; ARMV7-NEXT: adds r5, r5, r11
+; ARMV7-NEXT: adcs r6, r6, r8
+; ARMV7-NEXT: adc r11, r1, #0
+; ARMV7-NEXT: umlal r6, r11, r3, r12
+; ARMV7-NEXT: umull r8, r12, lr, r10
+; ARMV7-NEXT: str r6, [sp] @ 4-byte Spill
+; ARMV7-NEXT: ldr r6, [sp, #60]
+; ARMV7-NEXT: mla r7, lr, r7, r12
+; ARMV7-NEXT: str r8, [sp, #8] @ 4-byte Spill
+; ARMV7-NEXT: ldr r8, [sp, #4] @ 4-byte Reload
+; ARMV7-NEXT: mla r12, r6, r10, r7
+; ARMV7-NEXT: umull lr, r7, r9, r2
+; ARMV7-NEXT: mla r3, r9, r3, r7
+; ARMV7-NEXT: mla r2, r4, r2, r3
+; ARMV7-NEXT: ldr r3, [sp, #8] @ 4-byte Reload
+; ARMV7-NEXT: adds r3, lr, r3
+; ARMV7-NEXT: adc r7, r2, r12
+; ARMV7-NEXT: ldr r2, [sp] @ 4-byte Reload
+; ARMV7-NEXT: adds r2, r2, r3
+; ARMV7-NEXT: adc r3, r11, r7
+; ARMV7-NEXT: .LBB0_8: @ %overflow.res
+; ARMV7-NEXT: str r8, [r0]
+; ARMV7-NEXT: and r1, r1, #1
+; ARMV7-NEXT: str r5, [r0, #4]
+; ARMV7-NEXT: str r2, [r0, #8]
+; ARMV7-NEXT: str r3, [r0, #12]
+; ARMV7-NEXT: strb r1, [r0, #16]
+; ARMV7-NEXT: add sp, sp, #12
; ARMV7-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
index 64d9831442970..91ea1a1ad75e9 100644
--- a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll
@@ -4,12 +4,18 @@
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; ARMV6-LABEL: mulodi_test:
-; ARMV6: @ %bb.0: @ %start
+; ARMV6: @ %bb.0: @ %overflow.entry
; ARMV6-NEXT: push {r4, r5, r11, lr}
-; ARMV6-NEXT: umull r12, lr, r1, r2
-; ARMV6-NEXT: umull r4, r5, r3, r0
-; ARMV6-NEXT: cmp lr, #0
-; ARMV6-NEXT: movne lr, #1
+; ARMV6-NEXT: cmp r1, #0
+; ARMV6-NEXT: beq .LBB0_3
+; ARMV6-NEXT: @ %bb.1: @ %overflow.lhs
+; ARMV6-NEXT: cmp r3, #0
+; ARMV6-NEXT: beq .LBB0_5
+; ARMV6-NEXT: @ %bb.2: @ %overflow
+; ARMV6-NEXT: umull r12, r4, r1, r2
+; ARMV6-NEXT: umull lr, r5, r3, r0
+; ARMV6-NEXT: cmp r4, #0
+; ARMV6-NEXT: movne r4, #1
; ARMV6-NEXT: cmp r3, #0
; ARMV6-NEXT: movne r3, #1
; ARMV6-NEXT: cmp r1, #0
@@ -17,38 +23,105 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; ARMV6-NEXT: movne r1, #1
; ARMV6-NEXT: and r1, r1, r3
; ARMV6-NEXT: cmp r5, #0
-; ARMV6-NEXT: orr r1, r1, lr
+; ARMV6-NEXT: orr r1, r1, r4
; ARMV6-NEXT: movne r5, #1
; ARMV6-NEXT: orr r3, r1, r5
-; ARMV6-NEXT: add r1, r12, r4
+; ARMV6-NEXT: add r1, r12, lr
; ARMV6-NEXT: adds r1, r2, r1
; ARMV6-NEXT: mov r5, #0
; ARMV6-NEXT: adc r2, r5, #0
-; ARMV6-NEXT: orr r2, r3, r2
+; ARMV6-NEXT: orr r12, r3, r2
+; ARMV6-NEXT: and r2, r12, #1
+; ARMV6-NEXT: pop {r4, r5, r11, pc}
+; ARMV6-NEXT: .LBB0_3: @ %overflow.no.lhs
+; ARMV6-NEXT: cmp r3, #0
+; ARMV6-NEXT: beq .LBB0_7
+; ARMV6-NEXT: @ %bb.4: @ %overflow.no.lhs.only
+; ARMV6-NEXT: mov lr, r0
+; ARMV6-NEXT: umull r0, r4, r0, r2
+; ARMV6-NEXT: mov r12, r1
+; ARMV6-NEXT: mla r1, r1, r2, r4
+; ARMV6-NEXT: mul r12, r12, r3
+; ARMV6-NEXT: umlal r1, r12, lr, r3
+; ARMV6-NEXT: b .LBB0_6
+; ARMV6-NEXT: .LBB0_5: @ %overflow.no.rhs.only
+; ARMV6-NEXT: mov r12, r0
+; ARMV6-NEXT: umull r0, lr, r2, r0
+; ARMV6-NEXT: mov r4, r1
+; ARMV6-NEXT: mla r1, r3, r12, lr
+; ARMV6-NEXT: mul r12, r3, r4
+; ARMV6-NEXT: umlal r1, r12, r2, r4
+; ARMV6-NEXT: .LBB0_6: @ %overflow.res
+; ARMV6-NEXT: cmp r12, #0
+; ARMV6-NEXT: movne r12, #1
+; ARMV6-NEXT: and r2, r12, #1
+; ARMV6-NEXT: pop {r4, r5, r11, pc}
+; ARMV6-NEXT: .LBB0_7: @ %overflow.no
+; ARMV6-NEXT: mov r12, r0
+; ARMV6-NEXT: umull r0, r4, r0, r2
+; ARMV6-NEXT: mla r3, r12, r3, r4
+; ARMV6-NEXT: mov r12, #0
+; ARMV6-NEXT: mla r1, r1, r2, r3
+; ARMV6-NEXT: and r2, r12, #1
; ARMV6-NEXT: pop {r4, r5, r11, pc}
;
; ARMV7-LABEL: mulodi_test:
-; ARMV7: @ %bb.0: @ %start
+; ARMV7: @ %bb.0: @ %overflow.entry
; ARMV7-NEXT: push {r4, r5, r11, lr}
-; ARMV7-NEXT: umull r12, lr, r3, r0
+; ARMV7-NEXT: cmp r1, #0
+; ARMV7-NEXT: beq .LBB0_3
+; ARMV7-NEXT: @ %bb.1: @ %overflow.lhs
+; ARMV7-NEXT: cmp r3, #0
+; ARMV7-NEXT: beq .LBB0_5
+; ARMV7-NEXT: @ %bb.2: @ %overflow
+; ARMV7-NEXT: umull lr, r4, r3, r0
; ARMV7-NEXT: cmp r3, #0
; ARMV7-NEXT: movwne r3, #1
; ARMV7-NEXT: cmp r1, #0
-; ARMV7-NEXT: umull r0, r4, r0, r2
+; ARMV7-NEXT: umull r0, r12, r0, r2
; ARMV7-NEXT: umull r2, r5, r1, r2
; ARMV7-NEXT: movwne r1, #1
; ARMV7-NEXT: and r1, r1, r3
; ARMV7-NEXT: cmp r5, #0
; ARMV7-NEXT: movwne r5, #1
-; ARMV7-NEXT: cmp lr, #0
+; ARMV7-NEXT: cmp r4, #0
; ARMV7-NEXT: orr r1, r1, r5
-; ARMV7-NEXT: movwne lr, #1
-; ARMV7-NEXT: orr r3, r1, lr
-; ARMV7-NEXT: add r1, r2, r12
+; ARMV7-NEXT: movwne r4, #1
+; ARMV7-NEXT: orr r3, r1, r4
+; ARMV7-NEXT: add r1, r2, lr
; ARMV7-NEXT: mov r2, #0
-; ARMV7-NEXT: adds r1, r4, r1
+; ARMV7-NEXT: adds r1, r12, r1
; ARMV7-NEXT: adc r2, r2, #0
-; ARMV7-NEXT: orr r2, r3, r2
+; ARMV7-NEXT: orr r12, r3, r2
+; ARMV7-NEXT: and r2, r12, #1
+; ARMV7-NEXT: pop {r4, r5, r11, pc}
+; ARMV7-NEXT: .LBB0_3: @ %overflow.no.lhs
+; ARMV7-NEXT: mov r5, r0
+; ARMV7-NEXT: umull r0, r4, r0, r2
+; ARMV7-NEXT: cmp r3, #0
+; ARMV7-NEXT: beq .LBB0_7
+; ARMV7-NEXT: @ %bb.4: @ %overflow.no.lhs.only
+; ARMV7-NEXT: mul r12, r1, r3
+; ARMV7-NEXT: mla r1, r1, r2, r4
+; ARMV7-NEXT: umlal r1, r12, r5, r3
+; ARMV7-NEXT: b .LBB0_6
+; ARMV7-NEXT: .LBB0_5: @ %overflow.no.rhs.only
+; ARMV7-NEXT: mov lr, r0
+; ARMV7-NEXT: umull r0, r4, r2, r0
+; ARMV7-NEXT: mov r5, r1
+; ARMV7-NEXT: mul r12, r3, r1
+; ARMV7-NEXT: mla r1, r3, lr, r4
+; ARMV7-NEXT: umlal r1, r12, r2, r5
+; ARMV7-NEXT: .LBB0_6: @ %overflow.res
+; ARMV7-NEXT: cmp r12, #0
+; ARMV7-NEXT: movwne r12, #1
+; ARMV7-NEXT: and r2, r12, #1
+; ARMV7-NEXT: pop {r4, r5, r11, pc}
+; ARMV7-NEXT: .LBB0_7: @ %overflow.no
+; ARMV7-NEXT: mla r3, r5, r3, r4
+; ARMV7-NEXT: mov r12, #0
+; ARMV7-NEXT: mla r1, r1, r2, r3
+; ARMV7-NEXT: and r2, r12, #1
; ARMV7-NEXT: pop {r4, r5, r11, pc}
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
index 968c06136225d..5498a0741bc23 100644
--- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll
@@ -4,7 +4,13 @@
define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
; LA32-LABEL: smuloi64:
-; LA32: # %bb.0:
+; LA32: # %bb.0: # %overflow.entry
+; LA32-NEXT: srai.w $a6, $a0, 31
+; LA32-NEXT: srai.w $a5, $a2, 31
+; LA32-NEXT: beq $a1, $a6, .LBB0_3
+; LA32-NEXT: # %bb.1: # %overflow.lhs
+; LA32-NEXT: beq $a3, $a5, .LBB0_6
+; LA32-NEXT: # %bb.2: # %overflow
; LA32-NEXT: mulh.wu $a5, $a0, $a2
; LA32-NEXT: mul.w $a6, $a1, $a2
; LA32-NEXT: add.w $a5, $a6, $a5
@@ -38,11 +44,138 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
; LA32-NEXT: xor $a1, $a1, $a6
; LA32-NEXT: xor $a3, $a3, $a6
; LA32-NEXT: or $a1, $a3, $a1
-; LA32-NEXT: sltu $a1, $zero, $a1
+; LA32-NEXT: sltu $a6, $zero, $a1
+; LA32-NEXT: b .LBB0_9
+; LA32-NEXT: .LBB0_3: # %overflow.no.lhs
+; LA32-NEXT: beq $a3, $a5, .LBB0_8
+; LA32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a1, .LBB0_10
+; LA32-NEXT: # %bb.5: # %overflow.no.lhs.only
+; LA32-NEXT: move $a5, $a0
+; LA32-NEXT: move $a6, $a1
+; LA32-NEXT: bgez $a1, .LBB0_11
+; LA32-NEXT: b .LBB0_12
+; LA32-NEXT: .LBB0_6: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a3, .LBB0_14
+; LA32-NEXT: # %bb.7: # %overflow.no.rhs.only
+; LA32-NEXT: move $a5, $a2
+; LA32-NEXT: move $a6, $a3
+; LA32-NEXT: bgez $a3, .LBB0_15
+; LA32-NEXT: b .LBB0_16
+; LA32-NEXT: .LBB0_8: # %overflow.no
+; LA32-NEXT: move $a6, $zero
+; LA32-NEXT: mulh.wu $a5, $a0, $a2
+; LA32-NEXT: mul.w $a3, $a0, $a3
+; LA32-NEXT: add.w $a3, $a5, $a3
+; LA32-NEXT: mul.w $a1, $a1, $a2
+; LA32-NEXT: add.w $a5, $a3, $a1
+; LA32-NEXT: .LBB0_9: # %overflow.res
; LA32-NEXT: mul.w $a0, $a0, $a2
+; LA32-NEXT: b .LBB0_27
+; LA32-NEXT: .LBB0_10:
+; LA32-NEXT: sub.w $a5, $zero, $a0
+; LA32-NEXT: sltu $a6, $zero, $a0
+; LA32-NEXT: add.w $a6, $a1, $a6
+; LA32-NEXT: sub.w $a6, $zero, $a6
+; LA32-NEXT: bltz $a1, .LBB0_12
+; LA32-NEXT: .LBB0_11: # %overflow.no.lhs.only
+; LA32-NEXT: move $a6, $a1
+; LA32-NEXT: move $a5, $a0
+; LA32-NEXT: .LBB0_12: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a3, .LBB0_18
+; LA32-NEXT: # %bb.13: # %overflow.no.lhs.only
+; LA32-NEXT: move $a7, $a2
+; LA32-NEXT: move $a0, $a3
+; LA32-NEXT: b .LBB0_19
+; LA32-NEXT: .LBB0_14:
+; LA32-NEXT: sub.w $a5, $zero, $a2
+; LA32-NEXT: sltu $a6, $zero, $a2
+; LA32-NEXT: add.w $a6, $a3, $a6
+; LA32-NEXT: sub.w $a6, $zero, $a6
+; LA32-NEXT: bltz $a3, .LBB0_16
+; LA32-NEXT: .LBB0_15: # %overflow.no.rhs.only
+; LA32-NEXT: move $a6, $a3
+; LA32-NEXT: move $a5, $a2
+; LA32-NEXT: .LBB0_16: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a1, .LBB0_22
+; LA32-NEXT: # %bb.17: # %overflow.no.rhs.only
+; LA32-NEXT: move $a7, $a0
+; LA32-NEXT: move $a2, $a1
+; LA32-NEXT: b .LBB0_23
+; LA32-NEXT: .LBB0_18:
+; LA32-NEXT: sub.w $a7, $zero, $a2
+; LA32-NEXT: sltu $a0, $zero, $a2
+; LA32-NEXT: add.w $a0, $a3, $a0
+; LA32-NEXT: sub.w $a0, $zero, $a0
+; LA32-NEXT: .LBB0_19: # %overflow.no.lhs.only
+; LA32-NEXT: slti $a1, $a1, 0
+; LA32-NEXT: slti $t0, $a3, 0
+; LA32-NEXT: bltz $a3, .LBB0_21
+; LA32-NEXT: # %bb.20: # %overflow.no.lhs.only
+; LA32-NEXT: move $a0, $a3
+; LA32-NEXT: move $a7, $a2
+; LA32-NEXT: .LBB0_21: # %overflow.no.lhs.only
+; LA32-NEXT: mulh.wu $a2, $a5, $a7
+; LA32-NEXT: mul.w $a3, $a6, $a7
+; LA32-NEXT: add.w $a2, $a2, $a3
+; LA32-NEXT: mul.w $a3, $a5, $a7
+; LA32-NEXT: mul.w $a6, $a6, $a0
+; LA32-NEXT: mulh.wu $a7, $a5, $a0
+; LA32-NEXT: add.w $a6, $a7, $a6
+; LA32-NEXT: mul.w $a0, $a5, $a0
+; LA32-NEXT: add.w $a5, $a2, $a0
+; LA32-NEXT: sltu $a0, $a5, $a2
+; LA32-NEXT: add.w $a2, $a6, $a0
+; LA32-NEXT: xor $a1, $t0, $a1
+; LA32-NEXT: sub.w $a6, $zero, $a1
+; LA32-NEXT: xor $a0, $a3, $a6
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: sltu $a1, $a0, $a1
+; LA32-NEXT: xor $a3, $a5, $a6
+; LA32-NEXT: add.w $a5, $a3, $a1
+; LA32-NEXT: sltu $a1, $a5, $a1
+; LA32-NEXT: xor $a2, $a2, $a6
+; LA32-NEXT: b .LBB0_26
+; LA32-NEXT: .LBB0_22:
+; LA32-NEXT: sub.w $a7, $zero, $a0
+; LA32-NEXT: sltu $a2, $zero, $a0
+; LA32-NEXT: add.w $a2, $a1, $a2
+; LA32-NEXT: sub.w $a2, $zero, $a2
+; LA32-NEXT: .LBB0_23: # %overflow.no.rhs.only
+; LA32-NEXT: slti $a3, $a3, 0
+; LA32-NEXT: slti $t0, $a1, 0
+; LA32-NEXT: bltz $a1, .LBB0_25
+; LA32-NEXT: # %bb.24: # %overflow.no.rhs.only
+; LA32-NEXT: move $a2, $a1
+; LA32-NEXT: move $a7, $a0
+; LA32-NEXT: .LBB0_25: # %overflow.no.rhs.only
+; LA32-NEXT: mulh.wu $a0, $a5, $a7
+; LA32-NEXT: mul.w $a1, $a6, $a7
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: mul.w $a1, $a5, $a7
+; LA32-NEXT: mul.w $a6, $a6, $a2
+; LA32-NEXT: mulh.wu $a7, $a5, $a2
+; LA32-NEXT: add.w $a6, $a7, $a6
+; LA32-NEXT: mul.w $a2, $a5, $a2
+; LA32-NEXT: add.w $a2, $a0, $a2
+; LA32-NEXT: sltu $a0, $a2, $a0
+; LA32-NEXT: add.w $a6, $a6, $a0
+; LA32-NEXT: xor $a3, $a3, $t0
+; LA32-NEXT: sub.w $a7, $zero, $a3
+; LA32-NEXT: xor $a0, $a1, $a7
+; LA32-NEXT: add.w $a0, $a0, $a3
+; LA32-NEXT: sltu $a1, $a0, $a3
+; LA32-NEXT: xor $a2, $a2, $a7
+; LA32-NEXT: add.w $a5, $a2, $a1
+; LA32-NEXT: sltu $a1, $a5, $a1
+; LA32-NEXT: xor $a2, $a6, $a7
+; LA32-NEXT: .LBB0_26: # %overflow.res
+; LA32-NEXT: add.w $a1, $a2, $a1
+; LA32-NEXT: sltu $a6, $zero, $a1
+; LA32-NEXT: .LBB0_27: # %overflow.res
; LA32-NEXT: st.w $a0, $a4, 0
+; LA32-NEXT: andi $a0, $a6, 1
; LA32-NEXT: st.w $a5, $a4, 4
-; LA32-NEXT: move $a0, $a1
; LA32-NEXT: ret
;
; LA64-LABEL: smuloi64:
@@ -63,7 +196,7 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; LA32-LABEL: smuloi128:
-; LA32: # %bb.0:
+; LA32: # %bb.0: # %overflow.entry
; LA32-NEXT: addi.w $sp, $sp, -48
; LA32-NEXT: .cfi_def_cfa_offset 48
; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill
@@ -88,198 +221,608 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; LA32-NEXT: .cfi_offset 29, -36
; LA32-NEXT: .cfi_offset 30, -40
; LA32-NEXT: .cfi_offset 31, -44
-; LA32-NEXT: ld.w $a5, $a1, 12
-; LA32-NEXT: ld.w $a6, $a1, 8
-; LA32-NEXT: ld.w $t1, $a0, 4
-; LA32-NEXT: ld.w $a3, $a1, 0
-; LA32-NEXT: ld.w $a7, $a0, 8
-; LA32-NEXT: ld.w $t0, $a0, 12
-; LA32-NEXT: ld.w $a4, $a0, 0
-; LA32-NEXT: ld.w $t4, $a1, 4
-; LA32-NEXT: mulh.wu $a0, $a7, $a3
-; LA32-NEXT: mul.w $a1, $t0, $a3
-; LA32-NEXT: add.w $a0, $a1, $a0
-; LA32-NEXT: sltu $a1, $a0, $a1
-; LA32-NEXT: mulh.wu $t2, $t0, $a3
-; LA32-NEXT: add.w $a1, $t2, $a1
-; LA32-NEXT: mul.w $t3, $a7, $t4
-; LA32-NEXT: add.w $t2, $t3, $a0
-; LA32-NEXT: sltu $a0, $t2, $t3
-; LA32-NEXT: mulh.wu $t3, $a7, $t4
-; LA32-NEXT: add.w $a0, $t3, $a0
-; LA32-NEXT: add.w $t5, $a1, $a0
-; LA32-NEXT: mul.w $t6, $t0, $t4
-; LA32-NEXT: add.w $t7, $t6, $t5
-; LA32-NEXT: srai.w $a0, $t0, 31
-; LA32-NEXT: mul.w $t8, $a3, $a0
-; LA32-NEXT: add.w $t3, $t7, $t8
-; LA32-NEXT: sltu $fp, $t3, $t7
+; LA32-NEXT: ld.w $a3, $a1, 12
+; LA32-NEXT: ld.w $a7, $a1, 8
+; LA32-NEXT: ld.w $a5, $a1, 0
+; LA32-NEXT: ld.w $a6, $a0, 0
+; LA32-NEXT: ld.w $t0, $a0, 4
+; LA32-NEXT: ld.w $a4, $a0, 12
+; LA32-NEXT: ld.w $a0, $a0, 8
+; LA32-NEXT: ld.w $a1, $a1, 4
+; LA32-NEXT: srai.w $t1, $t0, 31
+; LA32-NEXT: xor $t2, $a4, $t1
+; LA32-NEXT: xor $t1, $a0, $t1
+; LA32-NEXT: or $t2, $t1, $t2
+; LA32-NEXT: srai.w $t1, $a1, 31
+; LA32-NEXT: beq $t2, $zero, .LBB1_11
+; LA32-NEXT: # %bb.1: # %overflow.lhs
+; LA32-NEXT: xor $t2, $a7, $t1
+; LA32-NEXT: xor $t1, $a3, $t1
+; LA32-NEXT: or $t1, $t2, $t1
+; LA32-NEXT: beq $t1, $zero, .LBB1_14
+; LA32-NEXT: # %bb.2: # %overflow
+; LA32-NEXT: mulh.wu $t1, $a0, $a5
+; LA32-NEXT: mul.w $t2, $a4, $a5
+; LA32-NEXT: add.w $t1, $t2, $t1
+; LA32-NEXT: sltu $t2, $t1, $t2
+; LA32-NEXT: mulh.wu $t3, $a4, $a5
+; LA32-NEXT: add.w $t5, $t3, $t2
+; LA32-NEXT: mul.w $t3, $a0, $a1
+; LA32-NEXT: add.w $t2, $t3, $t1
+; LA32-NEXT: sltu $t1, $t2, $t3
+; LA32-NEXT: mulh.wu $t3, $a0, $a1
+; LA32-NEXT: add.w $t1, $t3, $t1
+; LA32-NEXT: add.w $t1, $t5, $t1
+; LA32-NEXT: mul.w $t6, $a4, $a1
+; LA32-NEXT: add.w $t7, $t6, $t1
+; LA32-NEXT: srai.w $t3, $a4, 31
+; LA32-NEXT: mul.w $t8, $a5, $t3
+; LA32-NEXT: add.w $t4, $t7, $t8
+; LA32-NEXT: sltu $fp, $t4, $t7
; LA32-NEXT: sltu $t6, $t7, $t6
-; LA32-NEXT: sltu $a1, $t5, $a1
-; LA32-NEXT: mulh.wu $t5, $t0, $t4
-; LA32-NEXT: add.w $a1, $t5, $a1
-; LA32-NEXT: add.w $a1, $a1, $t6
-; LA32-NEXT: mulh.wu $t5, $a3, $a0
+; LA32-NEXT: sltu $t1, $t1, $t5
+; LA32-NEXT: mulh.wu $t5, $a4, $a1
+; LA32-NEXT: add.w $t1, $t5, $t1
+; LA32-NEXT: add.w $t1, $t1, $t6
+; LA32-NEXT: mulh.wu $t5, $a5, $t3
; LA32-NEXT: add.w $t5, $t5, $t8
-; LA32-NEXT: mul.w $t6, $t4, $a0
+; LA32-NEXT: mul.w $t6, $a1, $t3
; LA32-NEXT: add.w $t5, $t5, $t6
-; LA32-NEXT: add.w $t8, $a1, $t5
-; LA32-NEXT: mulh.wu $a1, $a4, $a3
-; LA32-NEXT: mul.w $t5, $t1, $a3
-; LA32-NEXT: add.w $a1, $t5, $a1
-; LA32-NEXT: sltu $t5, $a1, $t5
-; LA32-NEXT: mulh.wu $t6, $t1, $a3
-; LA32-NEXT: add.w $t5, $t6, $t5
-; LA32-NEXT: mul.w $t6, $a4, $t4
-; LA32-NEXT: add.w $a1, $t6, $a1
-; LA32-NEXT: sltu $t6, $a1, $t6
-; LA32-NEXT: mulh.wu $t7, $a4, $t4
-; LA32-NEXT: add.w $t6, $t7, $t6
-; LA32-NEXT: add.w $t6, $t5, $t6
-; LA32-NEXT: mul.w $t7, $t1, $t4
-; LA32-NEXT: sltu $t5, $t6, $t5
+; LA32-NEXT: add.w $t5, $t1, $t5
+; LA32-NEXT: mulh.wu $t1, $a6, $a5
+; LA32-NEXT: mul.w $t6, $t0, $a5
+; LA32-NEXT: add.w $t1, $t6, $t1
+; LA32-NEXT: sltu $t6, $t1, $t6
+; LA32-NEXT: mulh.wu $t7, $t0, $a5
; LA32-NEXT: add.w $t6, $t7, $t6
-; LA32-NEXT: sltu $t7, $t6, $t7
-; LA32-NEXT: mulh.wu $t4, $t1, $t4
-; LA32-NEXT: add.w $t4, $t4, $t5
-; LA32-NEXT: add.w $t4, $t4, $t7
-; LA32-NEXT: add.w $t4, $t2, $t4
-; LA32-NEXT: mul.w $t5, $a7, $a3
-; LA32-NEXT: add.w $t6, $t5, $t6
-; LA32-NEXT: sltu $t5, $t6, $t5
-; LA32-NEXT: add.w $t7, $t4, $t5
-; LA32-NEXT: add.w $t4, $t8, $fp
-; LA32-NEXT: beq $t7, $t2, .LBB1_2
-; LA32-NEXT: # %bb.1:
-; LA32-NEXT: sltu $t5, $t7, $t2
-; LA32-NEXT: .LBB1_2:
-; LA32-NEXT: add.w $t5, $t3, $t5
-; LA32-NEXT: sltu $t2, $t5, $t3
-; LA32-NEXT: add.w $t4, $t4, $t2
-; LA32-NEXT: mulh.wu $t2, $a4, $a6
-; LA32-NEXT: mul.w $t3, $t1, $a6
-; LA32-NEXT: add.w $t2, $t3, $t2
-; LA32-NEXT: sltu $t3, $t2, $t3
-; LA32-NEXT: mulh.wu $t8, $t1, $a6
-; LA32-NEXT: add.w $s0, $t8, $t3
-; LA32-NEXT: mul.w $t3, $a4, $a5
-; LA32-NEXT: add.w $t8, $t3, $t2
-; LA32-NEXT: sltu $t2, $t8, $t3
-; LA32-NEXT: mulh.wu $t3, $a4, $a5
-; LA32-NEXT: add.w $t2, $t3, $t2
+; LA32-NEXT: mul.w $t7, $a6, $a1
+; LA32-NEXT: add.w $t1, $t7, $t1
+; LA32-NEXT: sltu $t7, $t1, $t7
+; LA32-NEXT: mulh.wu $t8, $a6, $a1
+; LA32-NEXT: add.w $t7, $t8, $t7
+; LA32-NEXT: add.w $t7, $t6, $t7
+; LA32-NEXT: mul.w $t8, $t0, $a1
+; LA32-NEXT: sltu $t6, $t7, $t6
+; LA32-NEXT: add.w $t7, $t8, $t7
+; LA32-NEXT: sltu $t8, $t7, $t8
+; LA32-NEXT: mulh.wu $a1, $t0, $a1
+; LA32-NEXT: add.w $a1, $a1, $t6
+; LA32-NEXT: add.w $a1, $a1, $t8
+; LA32-NEXT: add.w $t8, $t2, $a1
+; LA32-NEXT: mul.w $t6, $a0, $a5
+; LA32-NEXT: add.w $a1, $t6, $t7
+; LA32-NEXT: sltu $t6, $a1, $t6
+; LA32-NEXT: add.w $t7, $t8, $t6
+; LA32-NEXT: add.w $t5, $t5, $fp
+; LA32-NEXT: beq $t7, $t2, .LBB1_4
+; LA32-NEXT: # %bb.3: # %overflow
+; LA32-NEXT: sltu $t6, $t7, $t2
+; LA32-NEXT: .LBB1_4: # %overflow
+; LA32-NEXT: add.w $t6, $t4, $t6
+; LA32-NEXT: sltu $t2, $t6, $t4
+; LA32-NEXT: add.w $t5, $t5, $t2
+; LA32-NEXT: mulh.wu $t2, $a6, $a7
+; LA32-NEXT: mul.w $t4, $t0, $a7
+; LA32-NEXT: add.w $t2, $t4, $t2
+; LA32-NEXT: sltu $t4, $t2, $t4
+; LA32-NEXT: mulh.wu $t8, $t0, $a7
+; LA32-NEXT: add.w $s0, $t8, $t4
+; LA32-NEXT: mul.w $t4, $a6, $a3
+; LA32-NEXT: add.w $t8, $t4, $t2
+; LA32-NEXT: sltu $t2, $t8, $t4
+; LA32-NEXT: mulh.wu $t4, $a6, $a3
+; LA32-NEXT: add.w $t2, $t4, $t2
; LA32-NEXT: add.w $t2, $s0, $t2
-; LA32-NEXT: mul.w $s1, $t1, $a5
+; LA32-NEXT: mul.w $s1, $t0, $a3
; LA32-NEXT: add.w $s2, $s1, $t2
-; LA32-NEXT: srai.w $t3, $a5, 31
-; LA32-NEXT: mul.w $s3, $t3, $a4
+; LA32-NEXT: srai.w $t4, $a3, 31
+; LA32-NEXT: mul.w $s3, $t4, $a6
; LA32-NEXT: add.w $fp, $s2, $s3
; LA32-NEXT: sltu $s4, $fp, $s2
; LA32-NEXT: sltu $s1, $s2, $s1
; LA32-NEXT: sltu $t2, $t2, $s0
-; LA32-NEXT: mulh.wu $s0, $t1, $a5
+; LA32-NEXT: mulh.wu $s0, $t0, $a3
; LA32-NEXT: add.w $t2, $s0, $t2
; LA32-NEXT: add.w $t2, $t2, $s1
-; LA32-NEXT: mul.w $t1, $t3, $t1
-; LA32-NEXT: mulh.wu $s0, $t3, $a4
-; LA32-NEXT: add.w $t1, $s0, $t1
-; LA32-NEXT: add.w $t1, $t1, $s3
-; LA32-NEXT: add.w $s0, $t2, $t1
-; LA32-NEXT: add.w $t2, $t8, $t7
-; LA32-NEXT: mul.w $t7, $a4, $a6
-; LA32-NEXT: add.w $t1, $t7, $t6
-; LA32-NEXT: sltu $t7, $t1, $t7
-; LA32-NEXT: add.w $t2, $t2, $t7
-; LA32-NEXT: add.w $t6, $s0, $s4
-; LA32-NEXT: beq $t2, $t8, .LBB1_4
-; LA32-NEXT: # %bb.3:
-; LA32-NEXT: sltu $t7, $t2, $t8
-; LA32-NEXT: .LBB1_4:
+; LA32-NEXT: mul.w $t0, $t4, $t0
+; LA32-NEXT: mulh.wu $s0, $t4, $a6
+; LA32-NEXT: add.w $t0, $s0, $t0
+; LA32-NEXT: add.w $t0, $t0, $s3
+; LA32-NEXT: add.w $t0, $t2, $t0
+; LA32-NEXT: add.w $s0, $t8, $t7
+; LA32-NEXT: mul.w $t7, $a6, $a7
+; LA32-NEXT: add.w $t2, $t7, $a1
+; LA32-NEXT: sltu $t7, $t2, $t7
+; LA32-NEXT: add.w $a1, $s0, $t7
+; LA32-NEXT: add.w $t0, $t0, $s4
+; LA32-NEXT: beq $a1, $t8, .LBB1_6
+; LA32-NEXT: # %bb.5: # %overflow
+; LA32-NEXT: sltu $t7, $a1, $t8
+; LA32-NEXT: .LBB1_6: # %overflow
; LA32-NEXT: add.w $t7, $fp, $t7
; LA32-NEXT: sltu $t8, $t7, $fp
-; LA32-NEXT: add.w $t8, $t6, $t8
-; LA32-NEXT: add.w $t6, $t4, $t8
-; LA32-NEXT: add.w $t7, $t5, $t7
-; LA32-NEXT: sltu $s0, $t7, $t5
-; LA32-NEXT: add.w $s4, $t6, $s0
-; LA32-NEXT: mulh.wu $t5, $a7, $a6
-; LA32-NEXT: mul.w $s1, $t0, $a6
-; LA32-NEXT: add.w $s3, $s1, $t5
-; LA32-NEXT: mul.w $fp, $a7, $a5
+; LA32-NEXT: add.w $t8, $t0, $t8
+; LA32-NEXT: add.w $t0, $t5, $t8
+; LA32-NEXT: add.w $t7, $t6, $t7
+; LA32-NEXT: sltu $s0, $t7, $t6
+; LA32-NEXT: add.w $s4, $t0, $s0
+; LA32-NEXT: mulh.wu $t0, $a0, $a7
+; LA32-NEXT: mul.w $s1, $a4, $a7
+; LA32-NEXT: add.w $s3, $s1, $t0
+; LA32-NEXT: mul.w $fp, $a0, $a3
; LA32-NEXT: add.w $s2, $fp, $s3
; LA32-NEXT: add.w $t6, $s2, $s4
-; LA32-NEXT: mul.w $s5, $a7, $a6
-; LA32-NEXT: add.w $t5, $s5, $t7
-; LA32-NEXT: sltu $t7, $t5, $s5
+; LA32-NEXT: mul.w $s5, $a0, $a7
+; LA32-NEXT: add.w $t0, $s5, $t7
+; LA32-NEXT: sltu $t7, $t0, $s5
; LA32-NEXT: add.w $t6, $t6, $t7
-; LA32-NEXT: beq $t6, $s2, .LBB1_6
-; LA32-NEXT: # %bb.5:
+; LA32-NEXT: beq $t6, $s2, .LBB1_8
+; LA32-NEXT: # %bb.7: # %overflow
; LA32-NEXT: sltu $t7, $t6, $s2
-; LA32-NEXT: .LBB1_6:
-; LA32-NEXT: beq $s4, $t4, .LBB1_8
-; LA32-NEXT: # %bb.7:
-; LA32-NEXT: sltu $s0, $s4, $t4
-; LA32-NEXT: .LBB1_8:
-; LA32-NEXT: srai.w $t4, $t4, 31
+; LA32-NEXT: .LBB1_8: # %overflow
+; LA32-NEXT: beq $s4, $t5, .LBB1_10
+; LA32-NEXT: # %bb.9: # %overflow
+; LA32-NEXT: sltu $s0, $s4, $t5
+; LA32-NEXT: .LBB1_10: # %overflow
+; LA32-NEXT: srai.w $t5, $t5, 31
; LA32-NEXT: srai.w $t8, $t8, 31
-; LA32-NEXT: add.w $t8, $t4, $t8
+; LA32-NEXT: add.w $t8, $t5, $t8
; LA32-NEXT: add.w $s0, $t8, $s0
; LA32-NEXT: sltu $s1, $s3, $s1
-; LA32-NEXT: mulh.wu $s3, $t0, $a6
+; LA32-NEXT: mulh.wu $s3, $a4, $a7
; LA32-NEXT: add.w $s1, $s3, $s1
; LA32-NEXT: sltu $fp, $s2, $fp
-; LA32-NEXT: mulh.wu $s2, $a7, $a5
+; LA32-NEXT: mulh.wu $s2, $a0, $a3
; LA32-NEXT: add.w $fp, $s2, $fp
; LA32-NEXT: add.w $fp, $s1, $fp
-; LA32-NEXT: mul.w $s2, $t0, $a5
+; LA32-NEXT: mul.w $s2, $a4, $a3
; LA32-NEXT: add.w $s3, $s2, $fp
-; LA32-NEXT: mul.w $s4, $a6, $a0
-; LA32-NEXT: mul.w $s5, $t3, $a7
+; LA32-NEXT: mul.w $s4, $a7, $t3
+; LA32-NEXT: mul.w $s5, $t4, $a0
; LA32-NEXT: add.w $s6, $s5, $s4
; LA32-NEXT: add.w $s7, $s3, $s6
; LA32-NEXT: add.w $s8, $s7, $s0
; LA32-NEXT: add.w $t7, $s8, $t7
; LA32-NEXT: sltu $ra, $t7, $s8
-; LA32-NEXT: sltu $t4, $t8, $t4
-; LA32-NEXT: add.w $t4, $t8, $t4
+; LA32-NEXT: sltu $t5, $t8, $t5
+; LA32-NEXT: add.w $t5, $t8, $t5
; LA32-NEXT: sltu $t8, $s0, $t8
-; LA32-NEXT: add.w $t4, $t4, $t8
+; LA32-NEXT: add.w $t5, $t5, $t8
; LA32-NEXT: sltu $t8, $s7, $s3
; LA32-NEXT: sltu $s0, $s3, $s2
; LA32-NEXT: sltu $fp, $fp, $s1
-; LA32-NEXT: mulh.wu $s1, $t0, $a5
+; LA32-NEXT: mulh.wu $s1, $a4, $a3
; LA32-NEXT: add.w $fp, $s1, $fp
; LA32-NEXT: add.w $fp, $fp, $s0
-; LA32-NEXT: mulh.wu $a6, $a6, $a0
-; LA32-NEXT: add.w $a6, $a6, $s4
-; LA32-NEXT: mul.w $a0, $a5, $a0
-; LA32-NEXT: add.w $a0, $a6, $a0
-; LA32-NEXT: mul.w $a5, $t3, $t0
-; LA32-NEXT: mulh.wu $a6, $t3, $a7
-; LA32-NEXT: add.w $a5, $a6, $a5
-; LA32-NEXT: add.w $a5, $a5, $s5
-; LA32-NEXT: add.w $a0, $a5, $a0
-; LA32-NEXT: sltu $a5, $s6, $s5
-; LA32-NEXT: add.w $a0, $a0, $a5
+; LA32-NEXT: mulh.wu $a7, $a7, $t3
+; LA32-NEXT: add.w $a7, $a7, $s4
+; LA32-NEXT: mul.w $a3, $a3, $t3
+; LA32-NEXT: add.w $a3, $a7, $a3
+; LA32-NEXT: mul.w $a4, $t4, $a4
+; LA32-NEXT: mulh.wu $a0, $t4, $a0
+; LA32-NEXT: add.w $a0, $a0, $a4
+; LA32-NEXT: add.w $a0, $a0, $s5
+; LA32-NEXT: add.w $a0, $a0, $a3
+; LA32-NEXT: sltu $a3, $s6, $s5
+; LA32-NEXT: add.w $a0, $a0, $a3
; LA32-NEXT: add.w $a0, $fp, $a0
; LA32-NEXT: add.w $a0, $a0, $t8
-; LA32-NEXT: add.w $a0, $a0, $t4
-; LA32-NEXT: sltu $a5, $s8, $s7
-; LA32-NEXT: add.w $a0, $a0, $a5
+; LA32-NEXT: add.w $a0, $a0, $t5
+; LA32-NEXT: sltu $a3, $s8, $s7
+; LA32-NEXT: add.w $a0, $a0, $a3
; LA32-NEXT: add.w $a0, $a0, $ra
-; LA32-NEXT: srai.w $a5, $t2, 31
-; LA32-NEXT: xor $a0, $a0, $a5
-; LA32-NEXT: xor $a6, $t6, $a5
-; LA32-NEXT: or $a0, $a6, $a0
-; LA32-NEXT: xor $a6, $t7, $a5
-; LA32-NEXT: xor $a5, $t5, $a5
-; LA32-NEXT: or $a5, $a5, $a6
-; LA32-NEXT: or $a0, $a5, $a0
-; LA32-NEXT: sltu $a0, $zero, $a0
-; LA32-NEXT: mul.w $a3, $a4, $a3
-; LA32-NEXT: st.w $a3, $a2, 0
-; LA32-NEXT: st.w $a1, $a2, 4
-; LA32-NEXT: st.w $t1, $a2, 8
-; LA32-NEXT: st.w $t2, $a2, 12
+; LA32-NEXT: srai.w $a3, $a1, 31
+; LA32-NEXT: xor $a0, $a0, $a3
+; LA32-NEXT: xor $a4, $t6, $a3
+; LA32-NEXT: or $a0, $a4, $a0
+; LA32-NEXT: xor $a4, $t7, $a3
+; LA32-NEXT: xor $a3, $t0, $a3
+; LA32-NEXT: or $a3, $a3, $a4
+; LA32-NEXT: or $a0, $a3, $a0
+; LA32-NEXT: sltu $t3, $zero, $a0
+; LA32-NEXT: b .LBB1_17
+; LA32-NEXT: .LBB1_11: # %overflow.no.lhs
+; LA32-NEXT: xor $t2, $a7, $t1
+; LA32-NEXT: xor $t1, $a3, $t1
+; LA32-NEXT: or $t1, $t2, $t1
+; LA32-NEXT: beq $t1, $zero, .LBB1_16
+; LA32-NEXT: # %bb.12: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a4, .LBB1_18
+; LA32-NEXT: # %bb.13: # %overflow.no.lhs.only
+; LA32-NEXT: move $t1, $a0
+; LA32-NEXT: move $t3, $a4
+; LA32-NEXT: move $t2, $a6
+; LA32-NEXT: move $t4, $t0
+; LA32-NEXT: bgez $a4, .LBB1_19
+; LA32-NEXT: b .LBB1_20
+; LA32-NEXT: .LBB1_14: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a3, .LBB1_35
+; LA32-NEXT: # %bb.15: # %overflow.no.rhs.only
+; LA32-NEXT: move $t1, $a7
+; LA32-NEXT: move $t3, $a3
+; LA32-NEXT: move $t2, $a5
+; LA32-NEXT: move $t4, $a1
+; LA32-NEXT: bgez $a3, .LBB1_36
+; LA32-NEXT: b .LBB1_37
+; LA32-NEXT: .LBB1_16: # %overflow.no
+; LA32-NEXT: move $t3, $zero
+; LA32-NEXT: mulh.wu $t1, $a6, $a5
+; LA32-NEXT: mul.w $t2, $t0, $a5
+; LA32-NEXT: add.w $t1, $t2, $t1
+; LA32-NEXT: sltu $t2, $t1, $t2
+; LA32-NEXT: mulh.wu $t4, $t0, $a5
+; LA32-NEXT: add.w $t4, $t4, $t2
+; LA32-NEXT: mul.w $t2, $a6, $a1
+; LA32-NEXT: add.w $t1, $t2, $t1
+; LA32-NEXT: sltu $t2, $t1, $t2
+; LA32-NEXT: mulh.wu $t5, $a6, $a1
+; LA32-NEXT: add.w $t2, $t5, $t2
+; LA32-NEXT: add.w $t5, $t4, $t2
+; LA32-NEXT: mul.w $t6, $t0, $a1
+; LA32-NEXT: add.w $t7, $t6, $t5
+; LA32-NEXT: mul.w $t2, $a5, $a0
+; LA32-NEXT: mul.w $t8, $a7, $a6
+; LA32-NEXT: add.w $fp, $t8, $t2
+; LA32-NEXT: add.w $t2, $t7, $fp
+; LA32-NEXT: sltu $t6, $t7, $t6
+; LA32-NEXT: sltu $t7, $t2, $t7
+; LA32-NEXT: sltu $t4, $t5, $t4
+; LA32-NEXT: mulh.wu $t5, $t0, $a1
+; LA32-NEXT: add.w $t4, $t5, $t4
+; LA32-NEXT: add.w $t4, $t4, $t6
+; LA32-NEXT: mul.w $t0, $a7, $t0
+; LA32-NEXT: mulh.wu $a7, $a7, $a6
+; LA32-NEXT: add.w $a7, $a7, $t0
+; LA32-NEXT: mul.w $a3, $a3, $a6
+; LA32-NEXT: add.w $a3, $a7, $a3
+; LA32-NEXT: mulh.wu $a7, $a5, $a0
+; LA32-NEXT: mul.w $a4, $a5, $a4
+; LA32-NEXT: add.w $a4, $a7, $a4
+; LA32-NEXT: mul.w $a0, $a1, $a0
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: add.w $a0, $a3, $a0
+; LA32-NEXT: sltu $a1, $fp, $t8
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: add.w $a0, $t4, $a0
+; LA32-NEXT: add.w $a1, $a0, $t7
+; LA32-NEXT: .LBB1_17: # %overflow.res
+; LA32-NEXT: mul.w $a0, $a6, $a5
+; LA32-NEXT: b .LBB1_53
+; LA32-NEXT: .LBB1_18:
+; LA32-NEXT: sub.w $t2, $zero, $a0
+; LA32-NEXT: or $t1, $a6, $t0
+; LA32-NEXT: sltu $t3, $zero, $t1
+; LA32-NEXT: sub.w $t1, $t2, $t3
+; LA32-NEXT: sltu $t2, $t2, $t3
+; LA32-NEXT: sltu $t3, $zero, $a0
+; LA32-NEXT: add.w $t3, $a4, $t3
+; LA32-NEXT: add.w $t2, $t3, $t2
+; LA32-NEXT: sub.w $t3, $zero, $t2
+; LA32-NEXT: sub.w $t2, $zero, $a6
+; LA32-NEXT: sltu $t4, $zero, $a6
+; LA32-NEXT: add.w $t4, $t0, $t4
+; LA32-NEXT: sub.w $t4, $zero, $t4
+; LA32-NEXT: bltz $a4, .LBB1_20
+; LA32-NEXT: .LBB1_19: # %overflow.no.lhs.only
+; LA32-NEXT: move $t3, $a4
+; LA32-NEXT: move $t1, $a0
+; LA32-NEXT: .LBB1_20: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a4, .LBB1_24
+; LA32-NEXT: # %bb.21: # %overflow.no.lhs.only
+; LA32-NEXT: move $t4, $t0
+; LA32-NEXT: bgez $a4, .LBB1_25
+; LA32-NEXT: .LBB1_22: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a3, .LBB1_26
+; LA32-NEXT: .LBB1_23: # %overflow.no.lhs.only
+; LA32-NEXT: move $a0, $a7
+; LA32-NEXT: move $a6, $a3
+; LA32-NEXT: move $t0, $a5
+; LA32-NEXT: move $t5, $a1
+; LA32-NEXT: bgez $a3, .LBB1_27
+; LA32-NEXT: b .LBB1_28
+; LA32-NEXT: .LBB1_24: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a4, .LBB1_22
+; LA32-NEXT: .LBB1_25: # %overflow.no.lhs.only
+; LA32-NEXT: move $t2, $a6
+; LA32-NEXT: bgez $a3, .LBB1_23
+; LA32-NEXT: .LBB1_26:
+; LA32-NEXT: sub.w $a6, $zero, $a7
+; LA32-NEXT: or $a0, $a5, $a1
+; LA32-NEXT: sltu $t0, $zero, $a0
+; LA32-NEXT: sub.w $a0, $a6, $t0
+; LA32-NEXT: sltu $a6, $a6, $t0
+; LA32-NEXT: sltu $t0, $zero, $a7
+; LA32-NEXT: add.w $t0, $a3, $t0
+; LA32-NEXT: add.w $a6, $t0, $a6
+; LA32-NEXT: sub.w $a6, $zero, $a6
+; LA32-NEXT: sub.w $t0, $zero, $a5
+; LA32-NEXT: sltu $t5, $zero, $a5
+; LA32-NEXT: add.w $t5, $a1, $t5
+; LA32-NEXT: sub.w $t5, $zero, $t5
+; LA32-NEXT: bltz $a3, .LBB1_28
+; LA32-NEXT: .LBB1_27: # %overflow.no.lhs.only
+; LA32-NEXT: move $a6, $a3
+; LA32-NEXT: move $a0, $a7
+; LA32-NEXT: .LBB1_28: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a3, .LBB1_30
+; LA32-NEXT: # %bb.29: # %overflow.no.lhs.only
+; LA32-NEXT: move $t5, $a1
+; LA32-NEXT: bgez $a3, .LBB1_31
+; LA32-NEXT: b .LBB1_32
+; LA32-NEXT: .LBB1_30: # %overflow.no.lhs.only
+; LA32-NEXT: bltz $a3, .LBB1_32
+; LA32-NEXT: .LBB1_31: # %overflow.no.lhs.only
+; LA32-NEXT: move $t0, $a5
+; LA32-NEXT: .LBB1_32: # %overflow.no.lhs.only
+; LA32-NEXT: slti $a1, $a4, 0
+; LA32-NEXT: slti $a3, $a3, 0
+; LA32-NEXT: mulh.wu $a4, $t2, $t0
+; LA32-NEXT: mul.w $a5, $t4, $t0
+; LA32-NEXT: add.w $a4, $a5, $a4
+; LA32-NEXT: sltu $a5, $a4, $a5
+; LA32-NEXT: mulh.wu $a7, $t4, $t0
+; LA32-NEXT: add.w $a5, $a7, $a5
+; LA32-NEXT: mul.w $a7, $t2, $t5
+; LA32-NEXT: add.w $a4, $a7, $a4
+; LA32-NEXT: sltu $a7, $a4, $a7
+; LA32-NEXT: mulh.wu $t6, $t2, $t5
+; LA32-NEXT: add.w $a7, $t6, $a7
+; LA32-NEXT: add.w $a7, $a5, $a7
+; LA32-NEXT: mul.w $t6, $t4, $t5
+; LA32-NEXT: add.w $t7, $t6, $a7
+; LA32-NEXT: mul.w $t8, $t0, $t1
+; LA32-NEXT: add.w $t8, $t7, $t8
+; LA32-NEXT: sltu $fp, $t8, $t7
+; LA32-NEXT: sltu $t6, $t7, $t6
+; LA32-NEXT: sltu $a5, $a7, $a5
+; LA32-NEXT: mulh.wu $a7, $t4, $t5
+; LA32-NEXT: add.w $a5, $a7, $a5
+; LA32-NEXT: add.w $a5, $a5, $t6
+; LA32-NEXT: mulh.wu $a7, $t0, $t1
+; LA32-NEXT: mul.w $t6, $t0, $t3
+; LA32-NEXT: add.w $a7, $a7, $t6
+; LA32-NEXT: mul.w $t5, $t5, $t1
+; LA32-NEXT: add.w $a7, $a7, $t5
+; LA32-NEXT: add.w $a5, $a5, $a7
+; LA32-NEXT: add.w $a7, $a5, $fp
+; LA32-NEXT: mul.w $a5, $t2, $t0
+; LA32-NEXT: mulh.wu $t0, $t2, $a0
+; LA32-NEXT: mul.w $t5, $t4, $a0
+; LA32-NEXT: add.w $t0, $t5, $t0
+; LA32-NEXT: sltu $t5, $t0, $t5
+; LA32-NEXT: mulh.wu $t6, $t4, $a0
+; LA32-NEXT: add.w $t5, $t6, $t5
+; LA32-NEXT: mul.w $t6, $t2, $a6
+; LA32-NEXT: add.w $t7, $t6, $t0
+; LA32-NEXT: sltu $t0, $t7, $t6
+; LA32-NEXT: mulh.wu $t6, $t2, $a6
+; LA32-NEXT: add.w $t0, $t6, $t0
+; LA32-NEXT: add.w $t6, $t5, $t0
+; LA32-NEXT: mul.w $fp, $t4, $a6
+; LA32-NEXT: add.w $s0, $fp, $t6
+; LA32-NEXT: mul.w $t0, $a0, $t1
+; LA32-NEXT: add.w $t0, $s0, $t0
+; LA32-NEXT: sltu $s1, $t0, $s0
+; LA32-NEXT: sltu $fp, $s0, $fp
+; LA32-NEXT: sltu $t5, $t6, $t5
+; LA32-NEXT: mulh.wu $t4, $t4, $a6
+; LA32-NEXT: add.w $t4, $t4, $t5
+; LA32-NEXT: add.w $t4, $t4, $fp
+; LA32-NEXT: mulh.wu $t5, $a0, $t1
+; LA32-NEXT: mul.w $t3, $a0, $t3
+; LA32-NEXT: add.w $t3, $t5, $t3
+; LA32-NEXT: mul.w $a6, $a6, $t1
+; LA32-NEXT: add.w $a6, $t3, $a6
+; LA32-NEXT: add.w $t3, $t4, $a6
+; LA32-NEXT: mul.w $a0, $t2, $a0
+; LA32-NEXT: add.w $t2, $a7, $t7
+; LA32-NEXT: add.w $a6, $t8, $a0
+; LA32-NEXT: sltu $t1, $a6, $t8
+; LA32-NEXT: add.w $t2, $t2, $t1
+; LA32-NEXT: add.w $a0, $t3, $s1
+; LA32-NEXT: beq $t2, $a7, .LBB1_34
+; LA32-NEXT: # %bb.33: # %overflow.no.lhs.only
+; LA32-NEXT: sltu $t1, $t2, $a7
+; LA32-NEXT: .LBB1_34: # %overflow.no.lhs.only
+; LA32-NEXT: add.w $a7, $t0, $t1
+; LA32-NEXT: sltu $t0, $a7, $t0
+; LA32-NEXT: add.w $t0, $a0, $t0
+; LA32-NEXT: xor $a1, $a3, $a1
+; LA32-NEXT: sub.w $a3, $zero, $a1
+; LA32-NEXT: xor $a4, $a4, $a3
+; LA32-NEXT: xor $a5, $a5, $a3
+; LA32-NEXT: add.w $a0, $a5, $a1
+; LA32-NEXT: sltu $a5, $a0, $a5
+; LA32-NEXT: add.w $t1, $a4, $a5
+; LA32-NEXT: sltui $a4, $t1, 1
+; LA32-NEXT: sltu $a1, $a0, $a1
+; LA32-NEXT: and $a4, $a4, $a1
+; LA32-NEXT: xor $a1, $t2, $a3
+; LA32-NEXT: xor $a5, $a6, $a3
+; LA32-NEXT: add.w $t2, $a5, $a4
+; LA32-NEXT: sltu $a5, $t2, $a5
+; LA32-NEXT: add.w $a1, $a1, $a5
+; LA32-NEXT: sltui $a5, $a1, 1
+; LA32-NEXT: sltu $a4, $t2, $a4
+; LA32-NEXT: and $a4, $a5, $a4
+; LA32-NEXT: xor $a5, $t0, $a3
+; LA32-NEXT: xor $a3, $a7, $a3
+; LA32-NEXT: add.w $a4, $a3, $a4
+; LA32-NEXT: sltu $a3, $a4, $a3
+; LA32-NEXT: add.w $a3, $a5, $a3
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: b .LBB1_52
+; LA32-NEXT: .LBB1_35:
+; LA32-NEXT: sub.w $t2, $zero, $a7
+; LA32-NEXT: or $t1, $a5, $a1
+; LA32-NEXT: sltu $t3, $zero, $t1
+; LA32-NEXT: sub.w $t1, $t2, $t3
+; LA32-NEXT: sltu $t2, $t2, $t3
+; LA32-NEXT: sltu $t3, $zero, $a7
+; LA32-NEXT: add.w $t3, $a3, $t3
+; LA32-NEXT: add.w $t2, $t3, $t2
+; LA32-NEXT: sub.w $t3, $zero, $t2
+; LA32-NEXT: sub.w $t2, $zero, $a5
+; LA32-NEXT: sltu $t4, $zero, $a5
+; LA32-NEXT: add.w $t4, $a1, $t4
+; LA32-NEXT: sub.w $t4, $zero, $t4
+; LA32-NEXT: bltz $a3, .LBB1_37
+; LA32-NEXT: .LBB1_36: # %overflow.no.rhs.only
+; LA32-NEXT: move $t3, $a3
+; LA32-NEXT: move $t1, $a7
+; LA32-NEXT: .LBB1_37: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a3, .LBB1_41
+; LA32-NEXT: # %bb.38: # %overflow.no.rhs.only
+; LA32-NEXT: move $t4, $a1
+; LA32-NEXT: bgez $a3, .LBB1_42
+; LA32-NEXT: .LBB1_39: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a4, .LBB1_43
+; LA32-NEXT: .LBB1_40: # %overflow.no.rhs.only
+; LA32-NEXT: move $a1, $a0
+; LA32-NEXT: move $a5, $a4
+; LA32-NEXT: move $a7, $a6
+; LA32-NEXT: move $t5, $t0
+; LA32-NEXT: bgez $a4, .LBB1_44
+; LA32-NEXT: b .LBB1_45
+; LA32-NEXT: .LBB1_41: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a3, .LBB1_39
+; LA32-NEXT: .LBB1_42: # %overflow.no.rhs.only
+; LA32-NEXT: move $t2, $a5
+; LA32-NEXT: bgez $a4, .LBB1_40
+; LA32-NEXT: .LBB1_43:
+; LA32-NEXT: sub.w $a5, $zero, $a0
+; LA32-NEXT: or $a1, $a6, $t0
+; LA32-NEXT: sltu $a7, $zero, $a1
+; LA32-NEXT: sub.w $a1, $a5, $a7
+; LA32-NEXT: sltu $a5, $a5, $a7
+; LA32-NEXT: sltu $a7, $zero, $a0
+; LA32-NEXT: add.w $a7, $a4, $a7
+; LA32-NEXT: add.w $a5, $a7, $a5
+; LA32-NEXT: sub.w $a5, $zero, $a5
+; LA32-NEXT: sub.w $a7, $zero, $a6
+; LA32-NEXT: sltu $t5, $zero, $a6
+; LA32-NEXT: add.w $t5, $t0, $t5
+; LA32-NEXT: sub.w $t5, $zero, $t5
+; LA32-NEXT: bltz $a4, .LBB1_45
+; LA32-NEXT: .LBB1_44: # %overflow.no.rhs.only
+; LA32-NEXT: move $a5, $a4
+; LA32-NEXT: move $a1, $a0
+; LA32-NEXT: .LBB1_45: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a4, .LBB1_47
+; LA32-NEXT: # %bb.46: # %overflow.no.rhs.only
+; LA32-NEXT: move $t5, $t0
+; LA32-NEXT: bgez $a4, .LBB1_48
+; LA32-NEXT: b .LBB1_49
+; LA32-NEXT: .LBB1_47: # %overflow.no.rhs.only
+; LA32-NEXT: bltz $a4, .LBB1_49
+; LA32-NEXT: .LBB1_48: # %overflow.no.rhs.only
+; LA32-NEXT: move $a7, $a6
+; LA32-NEXT: .LBB1_49: # %overflow.no.rhs.only
+; LA32-NEXT: slti $a0, $a3, 0
+; LA32-NEXT: slti $a3, $a4, 0
+; LA32-NEXT: mulh.wu $a4, $t2, $a7
+; LA32-NEXT: mul.w $a6, $t4, $a7
+; LA32-NEXT: add.w $a4, $a6, $a4
+; LA32-NEXT: sltu $a6, $a4, $a6
+; LA32-NEXT: mulh.wu $t0, $t4, $a7
+; LA32-NEXT: add.w $a6, $t0, $a6
+; LA32-NEXT: mul.w $t0, $t2, $t5
+; LA32-NEXT: add.w $a4, $t0, $a4
+; LA32-NEXT: sltu $t0, $a4, $t0
+; LA32-NEXT: mulh.wu $t6, $t2, $t5
+; LA32-NEXT: add.w $t0, $t6, $t0
+; LA32-NEXT: add.w $t0, $a6, $t0
+; LA32-NEXT: mul.w $t6, $t4, $t5
+; LA32-NEXT: add.w $t7, $t6, $t0
+; LA32-NEXT: mul.w $t8, $a7, $t1
+; LA32-NEXT: add.w $t8, $t7, $t8
+; LA32-NEXT: sltu $fp, $t8, $t7
+; LA32-NEXT: sltu $t6, $t7, $t6
+; LA32-NEXT: sltu $a6, $t0, $a6
+; LA32-NEXT: mulh.wu $t0, $t4, $t5
+; LA32-NEXT: add.w $a6, $t0, $a6
+; LA32-NEXT: add.w $a6, $a6, $t6
+; LA32-NEXT: mulh.wu $t0, $a7, $t1
+; LA32-NEXT: mul.w $t6, $a7, $t3
+; LA32-NEXT: add.w $t0, $t0, $t6
+; LA32-NEXT: mul.w $t5, $t5, $t1
+; LA32-NEXT: add.w $t0, $t0, $t5
+; LA32-NEXT: add.w $a6, $a6, $t0
+; LA32-NEXT: add.w $t0, $a6, $fp
+; LA32-NEXT: mul.w $a6, $t2, $a7
+; LA32-NEXT: mulh.wu $a7, $t2, $a1
+; LA32-NEXT: mul.w $t5, $t4, $a1
+; LA32-NEXT: add.w $a7, $t5, $a7
+; LA32-NEXT: sltu $t5, $a7, $t5
+; LA32-NEXT: mulh.wu $t6, $t4, $a1
+; LA32-NEXT: add.w $t5, $t6, $t5
+; LA32-NEXT: mul.w $t6, $t2, $a5
+; LA32-NEXT: add.w $t7, $t6, $a7
+; LA32-NEXT: sltu $a7, $t7, $t6
+; LA32-NEXT: mulh.wu $t6, $t2, $a5
+; LA32-NEXT: add.w $a7, $t6, $a7
+; LA32-NEXT: add.w $t6, $t5, $a7
+; LA32-NEXT: mul.w $fp, $t4, $a5
+; LA32-NEXT: add.w $s0, $fp, $t6
+; LA32-NEXT: mul.w $a7, $a1, $t1
+; LA32-NEXT: add.w $a7, $s0, $a7
+; LA32-NEXT: sltu $s1, $a7, $s0
+; LA32-NEXT: sltu $fp, $s0, $fp
+; LA32-NEXT: sltu $t5, $t6, $t5
+; LA32-NEXT: mulh.wu $t4, $t4, $a5
+; LA32-NEXT: add.w $t4, $t4, $t5
+; LA32-NEXT: add.w $t4, $t4, $fp
+; LA32-NEXT: mulh.wu $t5, $a1, $t1
+; LA32-NEXT: mul.w $t3, $a1, $t3
+; LA32-NEXT: add.w $t3, $t5, $t3
+; LA32-NEXT: mul.w $a5, $a5, $t1
+; LA32-NEXT: add.w $a5, $t3, $a5
+; LA32-NEXT: add.w $t1, $t4, $a5
+; LA32-NEXT: mul.w $a1, $t2, $a1
+; LA32-NEXT: add.w $a5, $t0, $t7
+; LA32-NEXT: add.w $a1, $t8, $a1
+; LA32-NEXT: sltu $t2, $a1, $t8
+; LA32-NEXT: add.w $a5, $a5, $t2
+; LA32-NEXT: add.w $t1, $t1, $s1
+; LA32-NEXT: beq $a5, $t0, .LBB1_51
+; LA32-NEXT: # %bb.50: # %overflow.no.rhs.only
+; LA32-NEXT: sltu $t2, $a5, $t0
+; LA32-NEXT: .LBB1_51: # %overflow.no.rhs.only
+; LA32-NEXT: add.w $t0, $a7, $t2
+; LA32-NEXT: sltu $a7, $t0, $a7
+; LA32-NEXT: add.w $a7, $t1, $a7
+; LA32-NEXT: xor $a3, $a0, $a3
+; LA32-NEXT: sub.w $t3, $zero, $a3
+; LA32-NEXT: xor $a4, $a4, $t3
+; LA32-NEXT: xor $a6, $a6, $t3
+; LA32-NEXT: add.w $a0, $a6, $a3
+; LA32-NEXT: sltu $a6, $a0, $a6
+; LA32-NEXT: add.w $t1, $a4, $a6
+; LA32-NEXT: sltui $a4, $t1, 1
+; LA32-NEXT: sltu $a3, $a0, $a3
+; LA32-NEXT: and $a3, $a4, $a3
+; LA32-NEXT: xor $a4, $a5, $t3
+; LA32-NEXT: xor $a1, $a1, $t3
+; LA32-NEXT: add.w $t2, $a1, $a3
+; LA32-NEXT: sltu $a1, $t2, $a1
+; LA32-NEXT: add.w $a1, $a4, $a1
+; LA32-NEXT: sltui $a4, $a1, 1
+; LA32-NEXT: sltu $a3, $t2, $a3
+; LA32-NEXT: and $a3, $a4, $a3
+; LA32-NEXT: xor $a4, $a7, $t3
+; LA32-NEXT: xor $a5, $t0, $t3
+; LA32-NEXT: add.w $a3, $a5, $a3
+; LA32-NEXT: sltu $a5, $a3, $a5
+; LA32-NEXT: add.w $a4, $a4, $a5
+; LA32-NEXT: or $a3, $a3, $a4
+; LA32-NEXT: .LBB1_52: # %overflow.res
+; LA32-NEXT: sltu $t3, $zero, $a3
+; LA32-NEXT: .LBB1_53: # %overflow.res
+; LA32-NEXT: st.w $a0, $a2, 0
+; LA32-NEXT: st.w $t1, $a2, 4
+; LA32-NEXT: st.w $t2, $a2, 8
+; LA32-NEXT: andi $a0, $t3, 1
+; LA32-NEXT: st.w $a1, $a2, 12
; LA32-NEXT: ld.w $s8, $sp, 4 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s7, $sp, 8 # 4-byte Folded Reload
; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
@@ -295,7 +838,13 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; LA32-NEXT: ret
;
; LA64-LABEL: smuloi128:
-; LA64: # %bb.0:
+; LA64: # %bb.0: # %overflow.entry
+; LA64-NEXT: srai.d $a6, $a0, 63
+; LA64-NEXT: srai.d $a5, $a2, 63
+; LA64-NEXT: beq $a1, $a6, .LBB1_3
+; LA64-NEXT: # %bb.1: # %overflow.lhs
+; LA64-NEXT: beq $a3, $a5, .LBB1_5
+; LA64-NEXT: # %bb.2: # %overflow
; LA64-NEXT: mulh.du $a5, $a0, $a2
; LA64-NEXT: mul.d $a6, $a1, $a2
; LA64-NEXT: add.d $a5, $a6, $a5
@@ -329,11 +878,129 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; LA64-NEXT: xor $a1, $a1, $a6
; LA64-NEXT: xor $a3, $a3, $a6
; LA64-NEXT: or $a1, $a3, $a1
-; LA64-NEXT: sltu $a1, $zero, $a1
+; LA64-NEXT: sltu $a6, $zero, $a1
+; LA64-NEXT: b .LBB1_8
+; LA64-NEXT: .LBB1_3: # %overflow.no.lhs
+; LA64-NEXT: beq $a3, $a5, .LBB1_7
+; LA64-NEXT: # %bb.4: # %overflow.no.lhs.only
+; LA64-NEXT: slti $a5, $a1, 0
+; LA64-NEXT: masknez $a6, $a0, $a5
+; LA64-NEXT: sub.d $a7, $zero, $a0
+; LA64-NEXT: maskeqz $a7, $a7, $a5
+; LA64-NEXT: or $a7, $a7, $a6
+; LA64-NEXT: masknez $t0, $a1, $a5
+; LA64-NEXT: sltu $a0, $zero, $a0
+; LA64-NEXT: add.d $a0, $a1, $a0
+; LA64-NEXT: sub.d $a0, $zero, $a0
+; LA64-NEXT: maskeqz $a0, $a0, $a5
+; LA64-NEXT: or $a0, $a0, $t0
+; LA64-NEXT: maskeqz $a0, $a0, $a5
+; LA64-NEXT: or $a0, $a0, $t0
+; LA64-NEXT: maskeqz $a1, $a7, $a5
+; LA64-NEXT: or $a1, $a1, $a6
+; LA64-NEXT: slti $a6, $a3, 0
+; LA64-NEXT: masknez $a7, $a2, $a6
+; LA64-NEXT: sub.d $t0, $zero, $a2
+; LA64-NEXT: maskeqz $t0, $t0, $a6
+; LA64-NEXT: or $t0, $t0, $a7
+; LA64-NEXT: masknez $t1, $a3, $a6
+; LA64-NEXT: sltu $a2, $zero, $a2
+; LA64-NEXT: add.d $a2, $a3, $a2
+; LA64-NEXT: sub.d $a2, $zero, $a2
+; LA64-NEXT: maskeqz $a2, $a2, $a6
+; LA64-NEXT: or $a2, $a2, $t1
+; LA64-NEXT: maskeqz $a2, $a2, $a6
+; LA64-NEXT: or $a2, $a2, $t1
+; LA64-NEXT: maskeqz $a3, $t0, $a6
+; LA64-NEXT: or $a3, $a3, $a7
+; LA64-NEXT: mulh.du $a7, $a1, $a3
+; LA64-NEXT: mul.d $t0, $a0, $a3
+; LA64-NEXT: add.d $a7, $a7, $t0
+; LA64-NEXT: mul.d $a3, $a1, $a3
+; LA64-NEXT: mul.d $a0, $a0, $a2
+; LA64-NEXT: mulh.du $t0, $a1, $a2
+; LA64-NEXT: add.d $a0, $t0, $a0
+; LA64-NEXT: mul.d $a1, $a1, $a2
+; LA64-NEXT: add.d $a1, $a7, $a1
+; LA64-NEXT: sltu $a2, $a1, $a7
+; LA64-NEXT: add.d $a2, $a0, $a2
+; LA64-NEXT: xor $a5, $a6, $a5
+; LA64-NEXT: sub.d $a6, $zero, $a5
+; LA64-NEXT: xor $a0, $a3, $a6
+; LA64-NEXT: add.d $a0, $a0, $a5
+; LA64-NEXT: sltu $a3, $a0, $a5
+; LA64-NEXT: xor $a1, $a1, $a6
+; LA64-NEXT: add.d $a5, $a1, $a3
+; LA64-NEXT: sltu $a1, $a5, $a3
+; LA64-NEXT: b .LBB1_6
+; LA64-NEXT: .LBB1_5: # %overflow.no.rhs.only
+; LA64-NEXT: slti $a5, $a3, 0
+; LA64-NEXT: masknez $a6, $a2, $a5
+; LA64-NEXT: sub.d $a7, $zero, $a2
+; LA64-NEXT: maskeqz $a7, $a7, $a5
+; LA64-NEXT: or $a7, $a7, $a6
+; LA64-NEXT: masknez $t0, $a3, $a5
+; LA64-NEXT: sltu $a2, $zero, $a2
+; LA64-NEXT: add.d $a2, $a3, $a2
+; LA64-NEXT: sub.d $a2, $zero, $a2
+; LA64-NEXT: maskeqz $a2, $a2, $a5
+; LA64-NEXT: or $a2, $a2, $t0
+; LA64-NEXT: maskeqz $a2, $a2, $a5
+; LA64-NEXT: or $a2, $a2, $t0
+; LA64-NEXT: maskeqz $a3, $a7, $a5
+; LA64-NEXT: or $a3, $a3, $a6
+; LA64-NEXT: slti $a6, $a1, 0
+; LA64-NEXT: masknez $a7, $a0, $a6
+; LA64-NEXT: sub.d $t0, $zero, $a0
+; LA64-NEXT: maskeqz $t0, $t0, $a6
+; LA64-NEXT: or $t0, $t0, $a7
+; LA64-NEXT: masknez $t1, $a1, $a6
+; LA64-NEXT: sltu $a0, $zero, $a0
+; LA64-NEXT: add.d $a0, $a1, $a0
+; LA64-NEXT: sub.d $a0, $zero, $a0
+; LA64-NEXT: maskeqz $a0, $a0, $a6
+; LA64-NEXT: or $a0, $a0, $t1
+; LA64-NEXT: maskeqz $a0, $a0, $a6
+; LA64-NEXT: or $a0, $a0, $t1
+; LA64-NEXT: maskeqz $a1, $t0, $a6
+; LA64-NEXT: or $a1, $a1, $a7
+; LA64-NEXT: mulh.du $a7, $a3, $a1
+; LA64-NEXT: mul.d $t0, $a2, $a1
+; LA64-NEXT: add.d $a7, $a7, $t0
+; LA64-NEXT: mul.d $a1, $a3, $a1
+; LA64-NEXT: mul.d $a2, $a2, $a0
+; LA64-NEXT: mulh.du $t0, $a3, $a0
+; LA64-NEXT: add.d $a2, $t0, $a2
+; LA64-NEXT: mul.d $a0, $a3, $a0
+; LA64-NEXT: add.d $a3, $a7, $a0
+; LA64-NEXT: sltu $a0, $a3, $a7
+; LA64-NEXT: add.d $a2, $a2, $a0
+; LA64-NEXT: xor $a5, $a5, $a6
+; LA64-NEXT: sub.d $a6, $zero, $a5
+; LA64-NEXT: xor $a0, $a1, $a6
+; LA64-NEXT: add.d $a0, $a0, $a5
+; LA64-NEXT: sltu $a1, $a0, $a5
+; LA64-NEXT: xor $a3, $a3, $a6
+; LA64-NEXT: add.d $a5, $a3, $a1
+; LA64-NEXT: sltu $a1, $a5, $a1
+; LA64-NEXT: .LBB1_6: # %overflow.res
+; LA64-NEXT: xor $a2, $a2, $a6
+; LA64-NEXT: add.d $a1, $a2, $a1
+; LA64-NEXT: sltu $a6, $zero, $a1
+; LA64-NEXT: b .LBB1_9
+; LA64-NEXT: .LBB1_7: # %overflow.no
+; LA64-NEXT: move $a6, $zero
+; LA64-NEXT: mulh.du $a5, $a0, $a2
+; LA64-NEXT: mul.d $a3, $a0, $a3
+; LA64-NEXT: add.d $a3, $a5, $a3
+; LA64-NEXT: mul.d $a1, $a1, $a2
+; LA64-NEXT: add.d $a5, $a3, $a1
+; LA64-NEXT: .LBB1_8: # %overflow.res
; LA64-NEXT: mul.d $a0, $a0, $a2
+; LA64-NEXT: .LBB1_9: # %overflow.res
; LA64-NEXT: st.d $a0, $a4, 0
+; LA64-NEXT: andi $a0, $a6, 1
; LA64-NEXT: st.d $a5, $a4, 8
-; LA64-NEXT: move $a0, $a1
; LA64-NEXT: ret
%t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
%val = extractvalue {i128, i1} %t, 0
diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
index f573fdab1b153..5bebf54c3c1a0 100644
--- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
@@ -4,136 +4,343 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; PPC64-LABEL: muloti_test:
-; PPC64: # %bb.0: # %start
-; PPC64-NEXT: addic 9, 5, -1
-; PPC64-NEXT: mulld 10, 5, 4
-; PPC64-NEXT: mulld 11, 3, 6
-; PPC64-NEXT: subfe 9, 9, 5
-; PPC64-NEXT: add 10, 11, 10
-; PPC64-NEXT: addic 11, 3, -1
-; PPC64-NEXT: mulhdu 8, 3, 6
-; PPC64-NEXT: subfe 3, 11, 3
-; PPC64-NEXT: and 3, 3, 9
-; PPC64-NEXT: addic 9, 8, -1
-; PPC64-NEXT: subfe 8, 9, 8
-; PPC64-NEXT: or 3, 3, 8
-; PPC64-NEXT: mulhdu 5, 5, 4
-; PPC64-NEXT: addic 8, 5, -1
-; PPC64-NEXT: subfe 5, 8, 5
-; PPC64-NEXT: li 7, 0
-; PPC64-NEXT: or 5, 3, 5
-; PPC64-NEXT: mulhdu 8, 4, 6
-; PPC64-NEXT: addc 3, 8, 10
-; PPC64-NEXT: addze 7, 7
-; PPC64-NEXT: addic 8, 7, -1
-; PPC64-NEXT: subfe 7, 8, 7
-; PPC64-NEXT: or 5, 5, 7
+; PPC64: # %bb.0: # %overflow.entry
+; PPC64-NEXT: cmpldi 3, 0
+; PPC64-NEXT: beq 0, .LBB0_3
+; PPC64-NEXT: # %bb.1: # %overflow.lhs
+; PPC64-NEXT: cmpldi 5, 0
+; PPC64-NEXT: beq 0, .LBB0_5
+; PPC64-NEXT: # %bb.2: # %overflow
+; PPC64-NEXT: mulhdu. 7, 3, 6
+; PPC64-NEXT: mcrf 5, 0
+; PPC64-NEXT: cmpdi 6, 5, 0
+; PPC64-NEXT: mulhdu. 7, 5, 4
+; PPC64-NEXT: mcrf 1, 0
+; PPC64-NEXT: cmpdi 3, 0
+; PPC64-NEXT: mulld 5, 5, 4
+; PPC64-NEXT: mulld 3, 3, 6
+; PPC64-NEXT: crnor 20, 26, 2
+; PPC64-NEXT: add 3, 3, 5
+; PPC64-NEXT: crorc 20, 20, 22
+; PPC64-NEXT: mulhdu 7, 4, 6
+; PPC64-NEXT: addc 3, 7, 3
+; PPC64-NEXT: li 5, 0
+; PPC64-NEXT: addze. 5, 5
+; PPC64-NEXT: crorc 20, 20, 6
+; PPC64-NEXT: crorc 20, 20, 2
; PPC64-NEXT: mulld 4, 4, 6
+; PPC64-NEXT: b .LBB0_7
+; PPC64-NEXT: .LBB0_3: # %overflow.no.lhs
+; PPC64-NEXT: cmpldi 5, 0
+; PPC64-NEXT: beq 0, .LBB0_6
+; PPC64-NEXT: # %bb.4: # %overflow.no.lhs.only
+; PPC64-NEXT: mulhdu 7, 4, 6
+; PPC64-NEXT: mulld 8, 3, 6
+; PPC64-NEXT: mulld 9, 3, 5
+; PPC64-NEXT: add 3, 7, 8
+; PPC64-NEXT: mulhdu 7, 4, 5
+; PPC64-NEXT: mulld 5, 4, 5
+; PPC64-NEXT: mulld 4, 4, 6
+; PPC64-NEXT: addc 3, 3, 5
+; PPC64-NEXT: adde. 5, 7, 9
+; PPC64-NEXT: crnot 20, 2
+; PPC64-NEXT: b .LBB0_7
+; PPC64-NEXT: .LBB0_5: # %overflow.no.rhs.only
+; PPC64-NEXT: mulhdu 7, 6, 4
+; PPC64-NEXT: mulld 8, 5, 4
+; PPC64-NEXT: mulld 5, 5, 3
+; PPC64-NEXT: mulld 4, 6, 4
+; PPC64-NEXT: add 7, 7, 8
+; PPC64-NEXT: mulhdu 8, 6, 3
+; PPC64-NEXT: mulld 3, 6, 3
+; PPC64-NEXT: addc 3, 7, 3
+; PPC64-NEXT: adde. 5, 8, 5
+; PPC64-NEXT: crnot 20, 2
+; PPC64-NEXT: b .LBB0_7
+; PPC64-NEXT: .LBB0_6: # %overflow.no
+; PPC64-NEXT: mulld 5, 4, 5
+; PPC64-NEXT: mulhdu 7, 4, 6
+; PPC64-NEXT: mulld 3, 3, 6
+; PPC64-NEXT: add 5, 7, 5
+; PPC64-NEXT: mulld 4, 4, 6
+; PPC64-NEXT: add 3, 5, 3
+; PPC64-NEXT: crxor 20, 20, 20
+; PPC64-NEXT: .LBB0_7: # %overflow.res
+; PPC64-NEXT: li 5, 1
+; PPC64-NEXT: bclr 12, 20, 0
+; PPC64-NEXT: # %bb.8: # %overflow.res
+; PPC64-NEXT: li 5, 0
; PPC64-NEXT: blr
;
; PPC32-LABEL: muloti_test:
-; PPC32: # %bb.0: # %start
-; PPC32-NEXT: stwu 1, -64(1)
-; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill
+; PPC32: # %bb.0: # %overflow.entry
+; PPC32-NEXT: stwu 1, -80(1)
+; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill
; PPC32-NEXT: mfcr 12
-; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT: mullw 27, 9, 4
-; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill
-; PPC32-NEXT: mr 11, 7
-; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT: li 7, 0
-; PPC32-NEXT: mullw 26, 3, 10
-; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT: add 27, 26, 27
-; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT: cmpwi 7, 11, 0
-; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT: mullw 24, 11, 6
-; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill
-; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT: mulhwu 0, 8, 6
-; PPC32-NEXT: stw 12, 16(1)
-; PPC32-NEXT: mr 12, 5
-; PPC32-NEXT: mulhwu 5, 4, 10
-; PPC32-NEXT: addc 5, 5, 27
-; PPC32-NEXT: addze 27, 7
-; PPC32-NEXT: cmpwi 2, 27, 0
-; PPC32-NEXT: mullw 25, 12, 8
-; PPC32-NEXT: add 26, 24, 25
-; PPC32-NEXT: addc 0, 0, 26
-; PPC32-NEXT: addze 26, 7
-; PPC32-NEXT: mullw 23, 8, 6
-; PPC32-NEXT: mullw 22, 4, 10
-; PPC32-NEXT: addc 24, 22, 23
-; PPC32-NEXT: adde 22, 5, 0
-; PPC32-NEXT: mulhwu 29, 6, 10
-; PPC32-NEXT: mullw 21, 12, 10
-; PPC32-NEXT: addc 5, 21, 29
-; PPC32-NEXT: mulhwu 30, 12, 10
-; PPC32-NEXT: addze 0, 30
-; PPC32-NEXT: mullw 23, 6, 9
-; PPC32-NEXT: addc 5, 23, 5
-; PPC32-NEXT: mulhwu 28, 6, 9
-; PPC32-NEXT: addze 29, 28
-; PPC32-NEXT: addc 0, 0, 29
-; PPC32-NEXT: addze 29, 7
-; PPC32-NEXT: mullw 30, 12, 9
-; PPC32-NEXT: addc 0, 30, 0
-; PPC32-NEXT: mulhwu 25, 12, 9
-; PPC32-NEXT: adde 30, 25, 29
-; PPC32-NEXT: addc 0, 0, 24
-; PPC32-NEXT: adde 30, 30, 22
-; PPC32-NEXT: addze. 29, 7
+; PPC32-NEXT: or. 30, 4, 3
+; PPC32-NEXT: stw 18, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 19, 28(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 20, 32(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill
+; PPC32-NEXT: stw 12, 20(1)
+; PPC32-NEXT: beq 0, .LBB0_3
+; PPC32-NEXT: # %bb.1: # %overflow.lhs
+; PPC32-NEXT: or. 29, 8, 7
+; PPC32-NEXT: beq 0, .LBB0_5
+; PPC32-NEXT: # %bb.2: # %overflow
+; PPC32-NEXT: mullw 28, 9, 4
+; PPC32-NEXT: li 19, 0
+; PPC32-NEXT: cmpwi 2, 7, 0
+; PPC32-NEXT: cmpwi 3, 5, 0
+; PPC32-NEXT: cmpwi 7, 3, 0
+; PPC32-NEXT: mullw 27, 3, 10
+; PPC32-NEXT: add 28, 27, 28
+; PPC32-NEXT: mulhwu 11, 4, 10
+; PPC32-NEXT: addc 11, 11, 28
+; PPC32-NEXT: addze 28, 19
+; PPC32-NEXT: mullw 24, 5, 8
+; PPC32-NEXT: mullw 23, 7, 6
+; PPC32-NEXT: add 27, 23, 24
+; PPC32-NEXT: mulhwu 12, 8, 6
+; PPC32-NEXT: addc 12, 12, 27
+; PPC32-NEXT: addze 27, 19
+; PPC32-NEXT: mullw 22, 8, 6
+; PPC32-NEXT: mullw 21, 4, 10
+; PPC32-NEXT: addc 23, 21, 22
+; PPC32-NEXT: adde 21, 11, 12
+; PPC32-NEXT: mulhwu 26, 6, 10
+; PPC32-NEXT: mullw 20, 5, 10
+; PPC32-NEXT: addc 11, 20, 26
+; PPC32-NEXT: mulhwu 0, 5, 10
+; PPC32-NEXT: addze 12, 0
+; PPC32-NEXT: mullw 22, 6, 9
+; PPC32-NEXT: addc 11, 22, 11
+; PPC32-NEXT: mulhwu 25, 6, 9
+; PPC32-NEXT: addze 26, 25
+; PPC32-NEXT: addc 12, 12, 26
+; PPC32-NEXT: addze 26, 19
+; PPC32-NEXT: mullw 0, 5, 9
+; PPC32-NEXT: addc 12, 0, 12
+; PPC32-NEXT: mulhwu 24, 5, 9
+; PPC32-NEXT: adde 0, 24, 26
+; PPC32-NEXT: addc 12, 12, 23
+; PPC32-NEXT: adde 0, 0, 21
+; PPC32-NEXT: addze. 26, 19
; PPC32-NEXT: mcrf 1, 0
-; PPC32-NEXT: mulhwu. 29, 11, 6
-; PPC32-NEXT: mcrf 6, 0
-; PPC32-NEXT: mulhwu. 29, 12, 8
+; PPC32-NEXT: mulhwu. 26, 7, 6
; PPC32-NEXT: mcrf 5, 0
-; PPC32-NEXT: cmpwi 12, 0
-; PPC32-NEXT: crnor 20, 2, 30
-; PPC32-NEXT: cmpwi 3, 0
-; PPC32-NEXT: cmpwi 7, 9, 0
-; PPC32-NEXT: crnor 24, 30, 2
-; PPC32-NEXT: mulhwu. 12, 3, 10
-; PPC32-NEXT: crorc 20, 20, 26
-; PPC32-NEXT: mcrf 7, 0
+; PPC32-NEXT: crnor 20, 14, 10
; PPC32-NEXT: crorc 20, 20, 22
-; PPC32-NEXT: cmpwi 26, 0
-; PPC32-NEXT: crorc 28, 20, 2
-; PPC32-NEXT: mulhwu. 9, 9, 4
-; PPC32-NEXT: mcrf 5, 0
-; PPC32-NEXT: crorc 20, 24, 30
-; PPC32-NEXT: or. 3, 4, 3
+; PPC32-NEXT: cmpwi 2, 30, 0
+; PPC32-NEXT: cmpwi 3, 29, 0
+; PPC32-NEXT: mulhwu. 5, 5, 8
; PPC32-NEXT: mcrf 6, 0
-; PPC32-NEXT: crorc 20, 20, 22
-; PPC32-NEXT: or. 3, 8, 11
-; PPC32-NEXT: crorc 20, 20, 10
-; PPC32-NEXT: crnor 21, 2, 26
+; PPC32-NEXT: cmpwi 9, 0
+; PPC32-NEXT: crnor 21, 2, 30
+; PPC32-NEXT: crorc 20, 20, 26
+; PPC32-NEXT: crnor 23, 14, 10
+; PPC32-NEXT: mulhwu. 3, 3, 10
+; PPC32-NEXT: mcrf 7, 0
+; PPC32-NEXT: cmpwi 27, 0
+; PPC32-NEXT: crorc 20, 20, 2
+; PPC32-NEXT: crorc 21, 21, 30
+; PPC32-NEXT: mulhwu. 3, 9, 4
+; PPC32-NEXT: crorc 21, 21, 2
+; PPC32-NEXT: cmpwi 28, 0
+; PPC32-NEXT: crorc 21, 21, 2
+; PPC32-NEXT: cror 21, 23, 21
; PPC32-NEXT: cror 20, 21, 20
-; PPC32-NEXT: cror 20, 20, 28
-; PPC32-NEXT: crandc 20, 6, 20
+; PPC32-NEXT: crorc 20, 20, 6
; PPC32-NEXT: mullw 6, 6, 10
-; PPC32-NEXT: bc 12, 20, .LBB0_2
-; PPC32-NEXT: # %bb.1: # %start
; PPC32-NEXT: li 7, 1
-; PPC32-NEXT: .LBB0_2: # %start
-; PPC32-NEXT: lwz 12, 16(1)
-; PPC32-NEXT: mr 3, 30
-; PPC32-NEXT: mr 4, 0
-; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
+; PPC32-NEXT: bc 4, 20, .LBB0_7
+; PPC32-NEXT: b .LBB0_8
+; PPC32-NEXT: .LBB0_3: # %overflow.no.lhs
+; PPC32-NEXT: or. 11, 8, 7
+; PPC32-NEXT: beq 0, .LBB0_9
+; PPC32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; PPC32-NEXT: mulhwu 29, 10, 4
+; PPC32-NEXT: mullw 20, 10, 3
+; PPC32-NEXT: add 29, 29, 20
+; PPC32-NEXT: mulhwu 12, 6, 10
+; PPC32-NEXT: mulhwu 0, 6, 9
+; PPC32-NEXT: mulhwu 30, 5, 9
+; PPC32-NEXT: mulhwu 24, 8, 4
+; PPC32-NEXT: mullw 23, 5, 10
+; PPC32-NEXT: addc 12, 23, 12
+; PPC32-NEXT: mullw 22, 6, 9
+; PPC32-NEXT: mullw 21, 5, 9
+; PPC32-NEXT: mullw 9, 9, 4
+; PPC32-NEXT: add 9, 29, 9
+; PPC32-NEXT: mullw 3, 8, 3
+; PPC32-NEXT: add 3, 24, 3
+; PPC32-NEXT: mulhwu 11, 5, 10
+; PPC32-NEXT: mullw 29, 7, 4
+; PPC32-NEXT: add 3, 3, 29
+; PPC32-NEXT: addze 29, 11
+; PPC32-NEXT: addc 11, 22, 12
+; PPC32-NEXT: addze 0, 0
+; PPC32-NEXT: li 12, 0
+; PPC32-NEXT: addc 0, 29, 0
+; PPC32-NEXT: addze 29, 12
+; PPC32-NEXT: addc 0, 21, 0
+; PPC32-NEXT: mullw 19, 10, 4
+; PPC32-NEXT: adde 30, 30, 29
+; PPC32-NEXT: addc 0, 0, 19
+; PPC32-NEXT: adde 9, 30, 9
+; PPC32-NEXT: mulhwu 27, 6, 8
+; PPC32-NEXT: mullw 18, 5, 8
+; PPC32-NEXT: addc 30, 18, 27
+; PPC32-NEXT: mulhwu 28, 5, 8
+; PPC32-NEXT: addze 29, 28
+; PPC32-NEXT: mulhwu 26, 6, 7
+; PPC32-NEXT: mulhwu 25, 5, 7
+; PPC32-NEXT: mullw 5, 5, 7
+; PPC32-NEXT: mullw 7, 6, 7
+; PPC32-NEXT: addc 7, 7, 30
+; PPC32-NEXT: addze 30, 26
+; PPC32-NEXT: addc 30, 29, 30
+; PPC32-NEXT: addze 12, 12
+; PPC32-NEXT: addc 5, 5, 30
+; PPC32-NEXT: mullw 4, 8, 4
+; PPC32-NEXT: adde 12, 25, 12
+; PPC32-NEXT: addc 4, 5, 4
+; PPC32-NEXT: adde 3, 12, 3
+; PPC32-NEXT: mullw 5, 6, 8
+; PPC32-NEXT: addc 12, 0, 5
+; PPC32-NEXT: adde 0, 9, 7
+; PPC32-NEXT: addze 4, 4
+; PPC32-NEXT: addze 3, 3
+; PPC32-NEXT: or. 3, 4, 3
+; PPC32-NEXT: mullw 6, 6, 10
+; PPC32-NEXT: b .LBB0_6
+; PPC32-NEXT: .LBB0_5: # %overflow.no.rhs.only
+; PPC32-NEXT: mulhwu 29, 6, 8
+; PPC32-NEXT: mullw 20, 6, 7
+; PPC32-NEXT: add 29, 29, 20
+; PPC32-NEXT: mulhwu 12, 10, 6
+; PPC32-NEXT: mulhwu 0, 10, 5
+; PPC32-NEXT: mulhwu 30, 9, 5
+; PPC32-NEXT: mulhwu 24, 4, 8
+; PPC32-NEXT: mullw 23, 9, 6
+; PPC32-NEXT: addc 12, 23, 12
+; PPC32-NEXT: mullw 22, 10, 5
+; PPC32-NEXT: mullw 21, 9, 5
+; PPC32-NEXT: mullw 5, 5, 8
+; PPC32-NEXT: add 5, 29, 5
+; PPC32-NEXT: mullw 7, 4, 7
+; PPC32-NEXT: add 7, 24, 7
+; PPC32-NEXT: mulhwu 11, 9, 6
+; PPC32-NEXT: mullw 29, 3, 8
+; PPC32-NEXT: add 7, 7, 29
+; PPC32-NEXT: addze 29, 11
+; PPC32-NEXT: addc 11, 22, 12
+; PPC32-NEXT: addze 0, 0
+; PPC32-NEXT: li 12, 0
+; PPC32-NEXT: addc 0, 29, 0
+; PPC32-NEXT: addze 29, 12
+; PPC32-NEXT: addc 0, 21, 0
+; PPC32-NEXT: mullw 19, 6, 8
+; PPC32-NEXT: adde 30, 30, 29
+; PPC32-NEXT: addc 0, 0, 19
+; PPC32-NEXT: adde 5, 30, 5
+; PPC32-NEXT: mulhwu 27, 10, 4
+; PPC32-NEXT: mullw 18, 9, 4
+; PPC32-NEXT: addc 30, 18, 27
+; PPC32-NEXT: mulhwu 28, 9, 4
+; PPC32-NEXT: addze 29, 28
+; PPC32-NEXT: mulhwu 26, 10, 3
+; PPC32-NEXT: mulhwu 25, 9, 3
+; PPC32-NEXT: mullw 9, 9, 3
+; PPC32-NEXT: mullw 3, 10, 3
+; PPC32-NEXT: addc 3, 3, 30
+; PPC32-NEXT: addze 30, 26
+; PPC32-NEXT: addc 30, 29, 30
+; PPC32-NEXT: addze 12, 12
+; PPC32-NEXT: addc 9, 9, 30
+; PPC32-NEXT: mullw 8, 4, 8
+; PPC32-NEXT: adde 12, 25, 12
+; PPC32-NEXT: addc 8, 9, 8
+; PPC32-NEXT: adde 7, 12, 7
+; PPC32-NEXT: mullw 4, 10, 4
+; PPC32-NEXT: addc 12, 0, 4
+; PPC32-NEXT: adde 0, 5, 3
+; PPC32-NEXT: addze 3, 8
+; PPC32-NEXT: addze 4, 7
+; PPC32-NEXT: or. 3, 3, 4
+; PPC32-NEXT: mullw 6, 10, 6
+; PPC32-NEXT: .LBB0_6: # %overflow.no.rhs.only
+; PPC32-NEXT: crnot 20, 2
+; PPC32-NEXT: li 7, 1
+; PPC32-NEXT: bc 12, 20, .LBB0_8
+; PPC32-NEXT: .LBB0_7: # %overflow.res
+; PPC32-NEXT: li 7, 0
+; PPC32-NEXT: .LBB0_8: # %overflow.res
+; PPC32-NEXT: mr 4, 12
+; PPC32-NEXT: lwz 12, 20(1)
+; PPC32-NEXT: mr 3, 0
+; PPC32-NEXT: mr 5, 11
+; PPC32-NEXT: lwz 30, 72(1) # 4-byte Folded Reload
; PPC32-NEXT: mtcrf 32, 12 # cr2
-; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 22, 24(1) # 4-byte Folded Reload
-; PPC32-NEXT: lwz 21, 20(1) # 4-byte Folded Reload
-; PPC32-NEXT: addi 1, 1, 64
+; PPC32-NEXT: mtcrf 16, 12 # cr3
+; PPC32-NEXT: lwz 29, 68(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 28, 64(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 27, 60(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 26, 56(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 25, 52(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 24, 48(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 23, 44(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 22, 40(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 21, 36(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 20, 32(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 19, 28(1) # 4-byte Folded Reload
+; PPC32-NEXT: lwz 18, 24(1) # 4-byte Folded Reload
+; PPC32-NEXT: addi 1, 1, 80
; PPC32-NEXT: blr
+; PPC32-NEXT: .LBB0_9: # %overflow.no
+; PPC32-NEXT: mulhwu 11, 10, 4
+; PPC32-NEXT: mulhwu 12, 8, 6
+; PPC32-NEXT: mullw 3, 10, 3
+; PPC32-NEXT: add 3, 11, 3
+; PPC32-NEXT: mullw 26, 8, 5
+; PPC32-NEXT: mulhwu 0, 5, 10
+; PPC32-NEXT: mulhwu 30, 6, 10
+; PPC32-NEXT: mulhwu 29, 6, 9
+; PPC32-NEXT: mulhwu 28, 5, 9
+; PPC32-NEXT: mullw 27, 9, 4
+; PPC32-NEXT: add 3, 3, 27
+; PPC32-NEXT: mullw 7, 7, 6
+; PPC32-NEXT: mullw 4, 10, 4
+; PPC32-NEXT: mullw 8, 8, 6
+; PPC32-NEXT: addc 4, 8, 4
+; PPC32-NEXT: li 8, 0
+; PPC32-NEXT: mullw 25, 5, 10
+; PPC32-NEXT: mullw 5, 5, 9
+; PPC32-NEXT: mullw 9, 6, 9
+; PPC32-NEXT: mullw 6, 6, 10
+; PPC32-NEXT: add 10, 12, 26
+; PPC32-NEXT: add 7, 10, 7
+; PPC32-NEXT: adde 3, 7, 3
+; PPC32-NEXT: addc 7, 25, 30
+; PPC32-NEXT: addze 10, 0
+; PPC32-NEXT: addc 11, 9, 7
+; PPC32-NEXT: addze 7, 29
+; PPC32-NEXT: addc 7, 10, 7
+; PPC32-NEXT: addze 8, 8
+; PPC32-NEXT: addc 5, 5, 7
+; PPC32-NEXT: adde 7, 28, 8
+; PPC32-NEXT: addc 12, 5, 4
+; PPC32-NEXT: adde 0, 7, 3
+; PPC32-NEXT: li 7, 1
+; PPC32-NEXT: b .LBB0_7
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
%1 = extractvalue { i128, i1 } %0, 0
diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
index d6fd4f15c4e53..4c9aeaa3ba5a1 100644
--- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
@@ -3,7 +3,7 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 {
; RISCV32-LABEL: muloti_test:
-; RISCV32: # %bb.0: # %start
+; RISCV32: # %bb.0: # %overflow.entry
; RISCV32-NEXT: addi sp, sp, -32
; RISCV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
; RISCV32-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
@@ -11,100 +11,301 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 {
; RISCV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
; RISCV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
; RISCV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill
-; RISCV32-NEXT: lw a4, 0(a1)
+; RISCV32-NEXT: sw s6, 4(sp) # 4-byte Folded Spill
+; RISCV32-NEXT: sw s7, 0(sp) # 4-byte Folded Spill
+; RISCV32-NEXT: lw a3, 0(a1)
; RISCV32-NEXT: lw t0, 4(a1)
-; RISCV32-NEXT: lw a3, 8(a1)
-; RISCV32-NEXT: lw a1, 12(a1)
-; RISCV32-NEXT: lw a6, 0(a2)
-; RISCV32-NEXT: lw a5, 4(a2)
-; RISCV32-NEXT: lw a7, 8(a2)
+; RISCV32-NEXT: lw a4, 8(a1)
+; RISCV32-NEXT: lw a6, 12(a1)
+; RISCV32-NEXT: lw a1, 0(a2)
+; RISCV32-NEXT: lw a7, 4(a2)
+; RISCV32-NEXT: lw a5, 8(a2)
; RISCV32-NEXT: lw a2, 12(a2)
-; RISCV32-NEXT: mulhu t1, a4, a6
-; RISCV32-NEXT: mul t2, t0, a6
-; RISCV32-NEXT: mulhu t3, t0, a6
-; RISCV32-NEXT: mul t4, a4, a5
-; RISCV32-NEXT: mulhu t5, a4, a5
+; RISCV32-NEXT: or t4, a4, a6
+; RISCV32-NEXT: beqz t4, .LBB0_5
+; RISCV32-NEXT: # %bb.1: # %overflow.lhs
+; RISCV32-NEXT: or t5, a5, a2
+; RISCV32-NEXT: beqz t5, .LBB0_9
+; RISCV32-NEXT: # %bb.2: # %overflow
+; RISCV32-NEXT: mulhu t1, a3, a1
+; RISCV32-NEXT: mul t2, t0, a1
+; RISCV32-NEXT: mulhu t3, t0, a1
+; RISCV32-NEXT: mul t6, a3, a7
+; RISCV32-NEXT: mulhu s0, a3, a7
+; RISCV32-NEXT: mul s4, t0, a7
+; RISCV32-NEXT: mul s1, a5, a3
+; RISCV32-NEXT: mul s5, a4, a1
; RISCV32-NEXT: mul s2, t0, a5
-; RISCV32-NEXT: mul t6, a7, a4
-; RISCV32-NEXT: mul s3, a3, a6
-; RISCV32-NEXT: mul s0, t0, a7
-; RISCV32-NEXT: mul s1, a2, a4
-; RISCV32-NEXT: mul s4, a5, a3
-; RISCV32-NEXT: add s1, s1, s0
-; RISCV32-NEXT: mul s0, a1, a6
-; RISCV32-NEXT: add s4, s0, s4
-; RISCV32-NEXT: mulhu s5, t0, a5
+; RISCV32-NEXT: mul s3, a2, a3
+; RISCV32-NEXT: mul s6, a7, a4
+; RISCV32-NEXT: add s3, s3, s2
+; RISCV32-NEXT: mul s2, a6, a1
+; RISCV32-NEXT: add s6, s2, s6
+; RISCV32-NEXT: mulhu s7, t0, a7
; RISCV32-NEXT: add t1, t2, t1
; RISCV32-NEXT: sltu t2, t1, t2
; RISCV32-NEXT: add t2, t3, t2
-; RISCV32-NEXT: mulhu s0, a7, a4
-; RISCV32-NEXT: add t1, t4, t1
-; RISCV32-NEXT: sltu t3, t1, t4
-; RISCV32-NEXT: add t3, t5, t3
-; RISCV32-NEXT: mulhu t5, a3, a6
-; RISCV32-NEXT: add t4, s3, t6
-; RISCV32-NEXT: add s1, s0, s1
-; RISCV32-NEXT: add t6, t5, s4
-; RISCV32-NEXT: sltu s3, t4, s3
+; RISCV32-NEXT: mulhu s2, a5, a3
+; RISCV32-NEXT: add t1, t6, t1
+; RISCV32-NEXT: sltu t3, t1, t6
+; RISCV32-NEXT: add t3, s0, t3
+; RISCV32-NEXT: mulhu s0, a4, a1
+; RISCV32-NEXT: add t6, s5, s1
+; RISCV32-NEXT: add s3, s2, s3
+; RISCV32-NEXT: add s1, s0, s6
+; RISCV32-NEXT: sltu s5, t6, s5
; RISCV32-NEXT: add t3, t2, t3
; RISCV32-NEXT: sltu t2, t3, t2
-; RISCV32-NEXT: add s5, s5, t2
-; RISCV32-NEXT: add s4, t6, s1
-; RISCV32-NEXT: add t3, s2, t3
-; RISCV32-NEXT: add t2, t3, t4
-; RISCV32-NEXT: sltu s2, t3, s2
-; RISCV32-NEXT: sltu t4, t2, t3
-; RISCV32-NEXT: add s2, s5, s2
-; RISCV32-NEXT: add s3, s4, s3
-; RISCV32-NEXT: add t3, s2, s3
-; RISCV32-NEXT: add t3, t3, t4
-; RISCV32-NEXT: beq t3, s2, .LBB0_2
-; RISCV32-NEXT: # %bb.1: # %start
-; RISCV32-NEXT: sltu t4, t3, s2
-; RISCV32-NEXT: .LBB0_2: # %start
-; RISCV32-NEXT: sltu s0, s1, s0
-; RISCV32-NEXT: snez s1, t0
-; RISCV32-NEXT: snez s2, a2
-; RISCV32-NEXT: sltu t5, t6, t5
-; RISCV32-NEXT: mulhu t6, a2, a4
-; RISCV32-NEXT: mulhu t0, t0, a7
-; RISCV32-NEXT: or a2, a7, a2
-; RISCV32-NEXT: snez a7, a5
-; RISCV32-NEXT: mul a4, a4, a6
-; RISCV32-NEXT: mulhu a6, a1, a6
-; RISCV32-NEXT: mulhu a5, a5, a3
-; RISCV32-NEXT: or a3, a3, a1
-; RISCV32-NEXT: snez a1, a1
-; RISCV32-NEXT: and s1, s2, s1
-; RISCV32-NEXT: snez t6, t6
-; RISCV32-NEXT: snez t0, t0
-; RISCV32-NEXT: and a1, a1, a7
-; RISCV32-NEXT: snez a6, a6
-; RISCV32-NEXT: snez a5, a5
+; RISCV32-NEXT: add s7, s7, t2
+; RISCV32-NEXT: add s6, s1, s3
+; RISCV32-NEXT: add t3, s4, t3
+; RISCV32-NEXT: add t2, t3, t6
+; RISCV32-NEXT: sltu s4, t3, s4
+; RISCV32-NEXT: sltu t6, t2, t3
+; RISCV32-NEXT: add s4, s7, s4
+; RISCV32-NEXT: add s5, s6, s5
+; RISCV32-NEXT: add t3, s4, s5
+; RISCV32-NEXT: add t3, t3, t6
+; RISCV32-NEXT: beq t3, s4, .LBB0_4
+; RISCV32-NEXT: # %bb.3: # %overflow
+; RISCV32-NEXT: sltu t6, t3, s4
+; RISCV32-NEXT: .LBB0_4: # %overflow
+; RISCV32-NEXT: sltu s2, s3, s2
+; RISCV32-NEXT: snez s3, t0
+; RISCV32-NEXT: snez s4, a2
+; RISCV32-NEXT: mulhu a2, a2, a3
+; RISCV32-NEXT: mulhu a5, t0, a5
+; RISCV32-NEXT: sltu t0, s1, s0
+; RISCV32-NEXT: snez s0, a7
+; RISCV32-NEXT: snez s1, a6
+; RISCV32-NEXT: mulhu a6, a6, a1
+; RISCV32-NEXT: mulhu a4, a7, a4
+; RISCV32-NEXT: snez a7, t5
+; RISCV32-NEXT: snez t4, t4
+; RISCV32-NEXT: and t5, s4, s3
; RISCV32-NEXT: snez a2, a2
-; RISCV32-NEXT: snez a3, a3
-; RISCV32-NEXT: or a7, s1, t6
-; RISCV32-NEXT: or a1, a1, a6
-; RISCV32-NEXT: and a2, a3, a2
-; RISCV32-NEXT: or a3, a7, t0
-; RISCV32-NEXT: or a1, a1, a5
-; RISCV32-NEXT: or a3, a3, s0
-; RISCV32-NEXT: or a1, a1, t5
-; RISCV32-NEXT: or a1, a2, a1
-; RISCV32-NEXT: or a1, a1, a3
-; RISCV32-NEXT: or a1, a1, t4
-; RISCV32-NEXT: andi a1, a1, 1
-; RISCV32-NEXT: sw a4, 0(a0)
+; RISCV32-NEXT: snez a5, a5
+; RISCV32-NEXT: and s0, s1, s0
+; RISCV32-NEXT: snez a6, a6
+; RISCV32-NEXT: snez a4, a4
+; RISCV32-NEXT: and a7, t4, a7
+; RISCV32-NEXT: or a2, t5, a2
+; RISCV32-NEXT: or a6, s0, a6
+; RISCV32-NEXT: or a2, a2, a5
+; RISCV32-NEXT: or a4, a6, a4
+; RISCV32-NEXT: or a2, a2, s2
+; RISCV32-NEXT: or a4, a4, t0
+; RISCV32-NEXT: or a4, a7, a4
+; RISCV32-NEXT: or a2, a4, a2
+; RISCV32-NEXT: or t4, a2, t6
+; RISCV32-NEXT: j .LBB0_14
+; RISCV32-NEXT: .LBB0_5: # %overflow.no.lhs
+; RISCV32-NEXT: or t1, a5, a2
+; RISCV32-NEXT: beqz t1, .LBB0_13
+; RISCV32-NEXT: # %bb.6: # %overflow.no.lhs.only
+; RISCV32-NEXT: mulhu t1, a3, a1
+; RISCV32-NEXT: mul t6, t0, a1
+; RISCV32-NEXT: mulhu s0, t0, a1
+; RISCV32-NEXT: mul t4, a3, a7
+; RISCV32-NEXT: mulhu t5, a3, a7
+; RISCV32-NEXT: mul t2, t0, a7
+; RISCV32-NEXT: mulhu t3, t0, a7
+; RISCV32-NEXT: mulhu s1, a1, a4
+; RISCV32-NEXT: mul s2, a1, a6
+; RISCV32-NEXT: mul a7, a7, a4
+; RISCV32-NEXT: add s1, s1, s2
+; RISCV32-NEXT: mulhu s2, a5, a4
+; RISCV32-NEXT: mul a6, a5, a6
+; RISCV32-NEXT: add a6, s2, a6
+; RISCV32-NEXT: mulhu s2, a3, a5
+; RISCV32-NEXT: add a7, s1, a7
+; RISCV32-NEXT: mul s1, a2, a4
+; RISCV32-NEXT: add a6, a6, s1
+; RISCV32-NEXT: mul s1, t0, a5
+; RISCV32-NEXT: add t1, t6, t1
+; RISCV32-NEXT: sltu t6, t1, t6
+; RISCV32-NEXT: add t6, s0, t6
+; RISCV32-NEXT: mulhu s0, t0, a5
+; RISCV32-NEXT: add s2, s1, s2
+; RISCV32-NEXT: sltu s1, s2, s1
+; RISCV32-NEXT: add s0, s0, s1
+; RISCV32-NEXT: mul s1, a3, a2
+; RISCV32-NEXT: add t1, t4, t1
+; RISCV32-NEXT: sltu t4, t1, t4
+; RISCV32-NEXT: add t4, t5, t4
+; RISCV32-NEXT: mul t5, t0, a2
+; RISCV32-NEXT: mulhu t0, t0, a2
+; RISCV32-NEXT: mulhu a2, a3, a2
+; RISCV32-NEXT: add s2, s1, s2
+; RISCV32-NEXT: sltu s1, s2, s1
+; RISCV32-NEXT: add a2, a2, s1
+; RISCV32-NEXT: mul s1, a1, a4
+; RISCV32-NEXT: mul a4, a5, a4
+; RISCV32-NEXT: mul a5, a3, a5
+; RISCV32-NEXT: add t4, t6, t4
+; RISCV32-NEXT: add a2, s0, a2
+; RISCV32-NEXT: sltu t6, t4, t6
+; RISCV32-NEXT: add t4, t2, t4
+; RISCV32-NEXT: sltu s0, a2, s0
+; RISCV32-NEXT: add s3, t5, a2
+; RISCV32-NEXT: add s1, t4, s1
+; RISCV32-NEXT: sltu t2, t4, t2
+; RISCV32-NEXT: add t3, t3, t6
+; RISCV32-NEXT: add a2, s3, a4
+; RISCV32-NEXT: sltu a4, s3, t5
+; RISCV32-NEXT: add t0, t0, s0
+; RISCV32-NEXT: sltu t4, s1, t4
+; RISCV32-NEXT: add t3, t3, t2
+; RISCV32-NEXT: sltu t5, a2, s3
+; RISCV32-NEXT: add a4, t0, a4
+; RISCV32-NEXT: add t2, s1, a5
+; RISCV32-NEXT: add a7, t3, a7
+; RISCV32-NEXT: add a5, a4, a6
+; RISCV32-NEXT: sltu a4, t2, s1
+; RISCV32-NEXT: add a6, a7, t4
+; RISCV32-NEXT: add t3, s2, a4
+; RISCV32-NEXT: add t3, a6, t3
+; RISCV32-NEXT: add a5, a5, t5
+; RISCV32-NEXT: beq t3, a6, .LBB0_8
+; RISCV32-NEXT: # %bb.7: # %overflow.no.lhs.only
+; RISCV32-NEXT: sltu a4, t3, a6
+; RISCV32-NEXT: .LBB0_8: # %overflow.no.lhs.only
+; RISCV32-NEXT: mul a1, a3, a1
+; RISCV32-NEXT: j .LBB0_12
+; RISCV32-NEXT: .LBB0_9: # %overflow.no.rhs.only
+; RISCV32-NEXT: mulhu t1, a1, a3
+; RISCV32-NEXT: mul t6, a7, a3
+; RISCV32-NEXT: mulhu s0, a7, a3
+; RISCV32-NEXT: mul t4, a1, t0
+; RISCV32-NEXT: mulhu t5, a1, t0
+; RISCV32-NEXT: mul t2, a7, t0
+; RISCV32-NEXT: mulhu t3, a7, t0
+; RISCV32-NEXT: mulhu s1, a3, a5
+; RISCV32-NEXT: mul s2, a3, a2
+; RISCV32-NEXT: mul t0, t0, a5
+; RISCV32-NEXT: add s1, s1, s2
+; RISCV32-NEXT: mulhu s2, a4, a5
+; RISCV32-NEXT: mul a2, a4, a2
+; RISCV32-NEXT: add a2, s2, a2
+; RISCV32-NEXT: mulhu s2, a1, a4
+; RISCV32-NEXT: add t0, s1, t0
+; RISCV32-NEXT: mul s1, a6, a5
+; RISCV32-NEXT: add s1, a2, s1
+; RISCV32-NEXT: mul a2, a7, a4
+; RISCV32-NEXT: add t1, t6, t1
+; RISCV32-NEXT: sltu t6, t1, t6
+; RISCV32-NEXT: add t6, s0, t6
+; RISCV32-NEXT: mulhu s0, a7, a4
+; RISCV32-NEXT: add s2, a2, s2
+; RISCV32-NEXT: sltu a2, s2, a2
+; RISCV32-NEXT: add a2, s0, a2
+; RISCV32-NEXT: mul s0, a1, a6
+; RISCV32-NEXT: add t1, t4, t1
+; RISCV32-NEXT: sltu t4, t1, t4
+; RISCV32-NEXT: add t4, t5, t4
+; RISCV32-NEXT: mul t5, a7, a6
+; RISCV32-NEXT: mulhu a7, a7, a6
+; RISCV32-NEXT: mulhu a6, a1, a6
+; RISCV32-NEXT: add s2, s0, s2
+; RISCV32-NEXT: sltu s0, s2, s0
+; RISCV32-NEXT: add a6, a6, s0
+; RISCV32-NEXT: mul s0, a3, a5
+; RISCV32-NEXT: mul a5, a4, a5
+; RISCV32-NEXT: mul a4, a1, a4
+; RISCV32-NEXT: add t4, t6, t4
+; RISCV32-NEXT: add a6, a2, a6
+; RISCV32-NEXT: sltu t6, t4, t6
+; RISCV32-NEXT: add t4, t2, t4
+; RISCV32-NEXT: sltu s3, a6, a2
+; RISCV32-NEXT: add a6, t5, a6
+; RISCV32-NEXT: add s0, t4, s0
+; RISCV32-NEXT: sltu t2, t4, t2
+; RISCV32-NEXT: add t3, t3, t6
+; RISCV32-NEXT: add a2, a6, a5
+; RISCV32-NEXT: sltu a5, a6, t5
+; RISCV32-NEXT: add a7, a7, s3
+; RISCV32-NEXT: sltu t4, s0, t4
+; RISCV32-NEXT: add t3, t3, t2
+; RISCV32-NEXT: sltu t5, a2, a6
+; RISCV32-NEXT: add a5, a7, a5
+; RISCV32-NEXT: add t2, s0, a4
+; RISCV32-NEXT: add a6, t3, t0
+; RISCV32-NEXT: add a5, a5, s1
+; RISCV32-NEXT: sltu a4, t2, s0
+; RISCV32-NEXT: add a6, a6, t4
+; RISCV32-NEXT: add t3, s2, a4
+; RISCV32-NEXT: add t3, a6, t3
+; RISCV32-NEXT: add a5, a5, t5
+; RISCV32-NEXT: beq t3, a6, .LBB0_11
+; RISCV32-NEXT: # %bb.10: # %overflow.no.rhs.only
+; RISCV32-NEXT: sltu a4, t3, a6
+; RISCV32-NEXT: .LBB0_11: # %overflow.no.rhs.only
+; RISCV32-NEXT: mul a1, a1, a3
+; RISCV32-NEXT: .LBB0_12: # %overflow.res
+; RISCV32-NEXT: add a4, a2, a4
+; RISCV32-NEXT: sltu a2, a4, a2
+; RISCV32-NEXT: add a2, a5, a2
+; RISCV32-NEXT: or a2, a4, a2
+; RISCV32-NEXT: snez t4, a2
+; RISCV32-NEXT: j .LBB0_15
+; RISCV32-NEXT: .LBB0_13: # %overflow.no
+; RISCV32-NEXT: li t4, 0
+; RISCV32-NEXT: mulhu t1, a3, a1
+; RISCV32-NEXT: mul t2, t0, a1
+; RISCV32-NEXT: mulhu t3, t0, a1
+; RISCV32-NEXT: mul t5, a3, a7
+; RISCV32-NEXT: mulhu t6, a3, a7
+; RISCV32-NEXT: mul s0, t0, a7
+; RISCV32-NEXT: mul s1, a5, t0
+; RISCV32-NEXT: mulhu s2, a5, a3
+; RISCV32-NEXT: add s1, s2, s1
+; RISCV32-NEXT: mul s2, a1, a4
+; RISCV32-NEXT: mul a5, a5, a3
+; RISCV32-NEXT: mulhu t0, t0, a7
+; RISCV32-NEXT: mul a2, a2, a3
+; RISCV32-NEXT: mul a7, a7, a4
+; RISCV32-NEXT: mulhu a4, a1, a4
+; RISCV32-NEXT: mul a6, a1, a6
+; RISCV32-NEXT: add t1, t2, t1
+; RISCV32-NEXT: add s2, a5, s2
+; RISCV32-NEXT: add a4, a4, a6
+; RISCV32-NEXT: sltu a6, t1, t2
+; RISCV32-NEXT: add t1, t5, t1
+; RISCV32-NEXT: add a2, s1, a2
+; RISCV32-NEXT: add a4, a4, a7
+; RISCV32-NEXT: sltu a5, s2, a5
+; RISCV32-NEXT: add a6, t3, a6
+; RISCV32-NEXT: sltu a7, t1, t5
+; RISCV32-NEXT: add a2, a2, a4
+; RISCV32-NEXT: add a7, t6, a7
+; RISCV32-NEXT: add a2, a2, a5
+; RISCV32-NEXT: add a7, a6, a7
+; RISCV32-NEXT: add a4, s0, a7
+; RISCV32-NEXT: sltu a5, a7, a6
+; RISCV32-NEXT: add t2, a4, s2
+; RISCV32-NEXT: sltu a6, a4, s0
+; RISCV32-NEXT: add a5, t0, a5
+; RISCV32-NEXT: sltu t3, t2, a4
+; RISCV32-NEXT: add a5, a5, a6
+; RISCV32-NEXT: add a2, a5, a2
+; RISCV32-NEXT: add t3, a2, t3
+; RISCV32-NEXT: .LBB0_14: # %overflow.res
+; RISCV32-NEXT: mul a1, a3, a1
+; RISCV32-NEXT: .LBB0_15: # %overflow.res
+; RISCV32-NEXT: andi a2, t4, 1
+; RISCV32-NEXT: sw a1, 0(a0)
; RISCV32-NEXT: sw t1, 4(a0)
; RISCV32-NEXT: sw t2, 8(a0)
; RISCV32-NEXT: sw t3, 12(a0)
-; RISCV32-NEXT: sb a1, 16(a0)
+; RISCV32-NEXT: sb a2, 16(a0)
; RISCV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
; RISCV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
; RISCV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
; RISCV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RISCV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload
; RISCV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload
+; RISCV32-NEXT: lw s6, 4(sp) # 4-byte Folded Reload
+; RISCV32-NEXT: lw s7, 0(sp) # 4-byte Folded Reload
; RISCV32-NEXT: addi sp, sp, 32
; RISCV32-NEXT: ret
start:
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index a30593d7d7afb..0dac74355d2e9 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -1314,38 +1314,173 @@ entry:
define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV32-LABEL: smulo.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: srai a6, a0, 31
+; RV32-NEXT: srai a5, a2, 31
+; RV32-NEXT: beq a1, a6, .LBB21_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beq a3, a5, .LBB21_6
+; RV32-NEXT: # %bb.2: # %overflow
; RV32-NEXT: mulhu a5, a0, a2
; RV32-NEXT: mul a6, a1, a2
; RV32-NEXT: mulhsu a7, a1, a2
; RV32-NEXT: mul t0, a3, a0
; RV32-NEXT: mulh t1, a1, a3
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: mul t2, a1, a3
; RV32-NEXT: mulhsu a3, a3, a0
-; RV32-NEXT: mul a2, a0, a2
-; RV32-NEXT: add a5, a6, a5
-; RV32-NEXT: sltu a0, a5, a6
-; RV32-NEXT: add a5, t0, a5
-; RV32-NEXT: add a0, a7, a0
-; RV32-NEXT: sltu a6, a5, t0
-; RV32-NEXT: srai a7, a5, 31
+; RV32-NEXT: add a1, a6, a5
+; RV32-NEXT: sltu a5, a1, a6
+; RV32-NEXT: add a1, t0, a1
+; RV32-NEXT: add a5, a7, a5
+; RV32-NEXT: sltu a6, a1, t0
; RV32-NEXT: add a3, a3, a6
-; RV32-NEXT: srai a6, a0, 31
-; RV32-NEXT: add t0, a0, a3
-; RV32-NEXT: srai a3, a3, 31
-; RV32-NEXT: sltu a0, t0, a0
+; RV32-NEXT: srai a6, a5, 31
+; RV32-NEXT: srai a7, a3, 31
+; RV32-NEXT: add a6, a6, a7
+; RV32-NEXT: srai a7, a1, 31
+; RV32-NEXT: add a3, a5, a3
+; RV32-NEXT: sltu a5, a3, a5
+; RV32-NEXT: add a3, t2, a3
+; RV32-NEXT: add a5, a6, a5
+; RV32-NEXT: sltu a6, a3, t2
+; RV32-NEXT: xor a3, a3, a7
+; RV32-NEXT: add a5, t1, a5
+; RV32-NEXT: add a5, a5, a6
+; RV32-NEXT: xor a5, a5, a7
+; RV32-NEXT: or a3, a3, a5
+; RV32-NEXT: snez a5, a3
+; RV32-NEXT: j .LBB21_9
+; RV32-NEXT: .LBB21_3: # %overflow.no.lhs
+; RV32-NEXT: beq a3, a5, .LBB21_8
+; RV32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a1, .LBB21_10
+; RV32-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32-NEXT: mv a5, a0
+; RV32-NEXT: mv a6, a1
+; RV32-NEXT: bgez a1, .LBB21_11
+; RV32-NEXT: j .LBB21_12
+; RV32-NEXT: .LBB21_6: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a3, .LBB21_14
+; RV32-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32-NEXT: mv a5, a2
+; RV32-NEXT: mv a6, a3
+; RV32-NEXT: bgez a3, .LBB21_15
+; RV32-NEXT: j .LBB21_16
+; RV32-NEXT: .LBB21_8: # %overflow.no
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: mulhu a6, a0, a2
+; RV32-NEXT: mul a3, a0, a3
; RV32-NEXT: add a3, a6, a3
-; RV32-NEXT: add t0, a1, t0
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: .LBB21_9: # %overflow.res
+; RV32-NEXT: mul a2, a0, a2
+; RV32-NEXT: j .LBB21_27
+; RV32-NEXT: .LBB21_10:
+; RV32-NEXT: neg a5, a0
+; RV32-NEXT: snez a6, a0
+; RV32-NEXT: neg a7, a1
+; RV32-NEXT: sub a6, a7, a6
+; RV32-NEXT: bltz a1, .LBB21_12
+; RV32-NEXT: .LBB21_11: # %overflow.no.lhs.only
+; RV32-NEXT: mv a6, a1
+; RV32-NEXT: mv a5, a0
+; RV32-NEXT: .LBB21_12: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a3, .LBB21_18
+; RV32-NEXT: # %bb.13: # %overflow.no.lhs.only
+; RV32-NEXT: mv a7, a2
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: j .LBB21_19
+; RV32-NEXT: .LBB21_14:
+; RV32-NEXT: neg a5, a2
+; RV32-NEXT: snez a6, a2
+; RV32-NEXT: neg a7, a3
+; RV32-NEXT: sub a6, a7, a6
+; RV32-NEXT: bltz a3, .LBB21_16
+; RV32-NEXT: .LBB21_15: # %overflow.no.rhs.only
+; RV32-NEXT: mv a6, a3
+; RV32-NEXT: mv a5, a2
+; RV32-NEXT: .LBB21_16: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a1, .LBB21_22
+; RV32-NEXT: # %bb.17: # %overflow.no.rhs.only
+; RV32-NEXT: mv a7, a0
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: j .LBB21_23
+; RV32-NEXT: .LBB21_18:
+; RV32-NEXT: neg a7, a2
+; RV32-NEXT: snez a0, a2
+; RV32-NEXT: neg t0, a3
+; RV32-NEXT: sub a0, t0, a0
+; RV32-NEXT: .LBB21_19: # %overflow.no.lhs.only
+; RV32-NEXT: slti a1, a1, 0
+; RV32-NEXT: slti t0, a3, 0
+; RV32-NEXT: bltz a3, .LBB21_21
+; RV32-NEXT: # %bb.20: # %overflow.no.lhs.only
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: mv a7, a2
+; RV32-NEXT: .LBB21_21: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu a2, a5, a7
+; RV32-NEXT: mul a3, a6, a7
+; RV32-NEXT: mul a7, a5, a7
+; RV32-NEXT: mul a6, a6, a0
+; RV32-NEXT: mulhu t1, a5, a0
+; RV32-NEXT: mul a0, a5, a0
+; RV32-NEXT: xor a1, t0, a1
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: add a6, t1, a6
+; RV32-NEXT: neg a3, a1
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: xor a5, a7, a3
+; RV32-NEXT: sltu a7, a0, a2
+; RV32-NEXT: add a2, a5, a1
+; RV32-NEXT: xor a0, a0, a3
+; RV32-NEXT: add a6, a6, a7
+; RV32-NEXT: sltu a5, a2, a1
+; RV32-NEXT: add a1, a0, a5
+; RV32-NEXT: sltu a0, a1, a5
+; RV32-NEXT: xor a3, a6, a3
; RV32-NEXT: add a0, a3, a0
-; RV32-NEXT: sltu a1, t0, a1
-; RV32-NEXT: xor a3, t0, a7
-; RV32-NEXT: add a0, t1, a0
+; RV32-NEXT: j .LBB21_26
+; RV32-NEXT: .LBB21_22:
+; RV32-NEXT: neg a7, a0
+; RV32-NEXT: snez a2, a0
+; RV32-NEXT: neg t0, a1
+; RV32-NEXT: sub a2, t0, a2
+; RV32-NEXT: .LBB21_23: # %overflow.no.rhs.only
+; RV32-NEXT: slti a3, a3, 0
+; RV32-NEXT: slti t0, a1, 0
+; RV32-NEXT: bltz a1, .LBB21_25
+; RV32-NEXT: # %bb.24: # %overflow.no.rhs.only
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a7, a0
+; RV32-NEXT: .LBB21_25: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu a0, a5, a7
+; RV32-NEXT: mul a1, a6, a7
+; RV32-NEXT: mul a7, a5, a7
+; RV32-NEXT: mul a6, a6, a2
+; RV32-NEXT: mulhu t1, a5, a2
+; RV32-NEXT: mul a2, a5, a2
+; RV32-NEXT: xor a3, a3, t0
; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: xor a0, a0, a7
-; RV32-NEXT: or a0, a3, a0
-; RV32-NEXT: snez a0, a0
+; RV32-NEXT: add a6, t1, a6
+; RV32-NEXT: neg a5, a3
+; RV32-NEXT: add a1, a0, a2
+; RV32-NEXT: xor a2, a7, a5
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: xor a1, a1, a5
+; RV32-NEXT: add a0, a6, a0
+; RV32-NEXT: sltu a3, a2, a3
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: sltu a3, a1, a3
+; RV32-NEXT: xor a0, a0, a5
+; RV32-NEXT: add a0, a0, a3
+; RV32-NEXT: .LBB21_26: # %overflow.res
+; RV32-NEXT: snez a5, a0
+; RV32-NEXT: .LBB21_27: # %overflow.res
+; RV32-NEXT: andi a0, a5, 1
; RV32-NEXT: sw a2, 0(a4)
-; RV32-NEXT: sw a5, 4(a4)
+; RV32-NEXT: sw a1, 4(a4)
; RV32-NEXT: ret
;
; RV64-LABEL: smulo.i64:
@@ -1359,38 +1494,173 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: smulo.i64:
-; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: srai a6, a0, 31
+; RV32ZBA-NEXT: srai a5, a2, 31
+; RV32ZBA-NEXT: beq a1, a6, .LBB21_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beq a3, a5, .LBB21_6
+; RV32ZBA-NEXT: # %bb.2: # %overflow
; RV32ZBA-NEXT: mulhu a5, a0, a2
; RV32ZBA-NEXT: mul a6, a1, a2
; RV32ZBA-NEXT: mulhsu a7, a1, a2
; RV32ZBA-NEXT: mul t0, a3, a0
; RV32ZBA-NEXT: mulh t1, a1, a3
-; RV32ZBA-NEXT: mul a1, a1, a3
+; RV32ZBA-NEXT: mul t2, a1, a3
; RV32ZBA-NEXT: mulhsu a3, a3, a0
-; RV32ZBA-NEXT: mul a2, a0, a2
-; RV32ZBA-NEXT: add a5, a6, a5
-; RV32ZBA-NEXT: sltu a0, a5, a6
-; RV32ZBA-NEXT: add a5, t0, a5
-; RV32ZBA-NEXT: add a0, a7, a0
-; RV32ZBA-NEXT: sltu a6, a5, t0
-; RV32ZBA-NEXT: srai a7, a5, 31
+; RV32ZBA-NEXT: add a1, a6, a5
+; RV32ZBA-NEXT: sltu a5, a1, a6
+; RV32ZBA-NEXT: add a1, t0, a1
+; RV32ZBA-NEXT: add a5, a7, a5
+; RV32ZBA-NEXT: sltu a6, a1, t0
; RV32ZBA-NEXT: add a3, a3, a6
-; RV32ZBA-NEXT: srai a6, a0, 31
-; RV32ZBA-NEXT: add t0, a0, a3
-; RV32ZBA-NEXT: srai a3, a3, 31
-; RV32ZBA-NEXT: sltu a0, t0, a0
+; RV32ZBA-NEXT: srai a6, a5, 31
+; RV32ZBA-NEXT: srai a7, a3, 31
+; RV32ZBA-NEXT: add a6, a6, a7
+; RV32ZBA-NEXT: srai a7, a1, 31
+; RV32ZBA-NEXT: add a3, a5, a3
+; RV32ZBA-NEXT: sltu a5, a3, a5
+; RV32ZBA-NEXT: add a3, t2, a3
+; RV32ZBA-NEXT: add a5, a6, a5
+; RV32ZBA-NEXT: sltu a6, a3, t2
+; RV32ZBA-NEXT: xor a3, a3, a7
+; RV32ZBA-NEXT: add a5, t1, a5
+; RV32ZBA-NEXT: add a5, a5, a6
+; RV32ZBA-NEXT: xor a5, a5, a7
+; RV32ZBA-NEXT: or a3, a3, a5
+; RV32ZBA-NEXT: snez a5, a3
+; RV32ZBA-NEXT: j .LBB21_9
+; RV32ZBA-NEXT: .LBB21_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: beq a3, a5, .LBB21_8
+; RV32ZBA-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB21_10
+; RV32ZBA-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a5, a0
+; RV32ZBA-NEXT: mv a6, a1
+; RV32ZBA-NEXT: bgez a1, .LBB21_11
+; RV32ZBA-NEXT: j .LBB21_12
+; RV32ZBA-NEXT: .LBB21_6: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB21_14
+; RV32ZBA-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a5, a2
+; RV32ZBA-NEXT: mv a6, a3
+; RV32ZBA-NEXT: bgez a3, .LBB21_15
+; RV32ZBA-NEXT: j .LBB21_16
+; RV32ZBA-NEXT: .LBB21_8: # %overflow.no
+; RV32ZBA-NEXT: li a5, 0
+; RV32ZBA-NEXT: mulhu a6, a0, a2
+; RV32ZBA-NEXT: mul a3, a0, a3
; RV32ZBA-NEXT: add a3, a6, a3
-; RV32ZBA-NEXT: add t0, a1, t0
+; RV32ZBA-NEXT: mul a1, a1, a2
+; RV32ZBA-NEXT: add a1, a3, a1
+; RV32ZBA-NEXT: .LBB21_9: # %overflow.res
+; RV32ZBA-NEXT: mul a2, a0, a2
+; RV32ZBA-NEXT: j .LBB21_27
+; RV32ZBA-NEXT: .LBB21_10:
+; RV32ZBA-NEXT: neg a5, a0
+; RV32ZBA-NEXT: snez a6, a0
+; RV32ZBA-NEXT: neg a7, a1
+; RV32ZBA-NEXT: sub a6, a7, a6
+; RV32ZBA-NEXT: bltz a1, .LBB21_12
+; RV32ZBA-NEXT: .LBB21_11: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a6, a1
+; RV32ZBA-NEXT: mv a5, a0
+; RV32ZBA-NEXT: .LBB21_12: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB21_18
+; RV32ZBA-NEXT: # %bb.13: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a7, a2
+; RV32ZBA-NEXT: mv a0, a3
+; RV32ZBA-NEXT: j .LBB21_19
+; RV32ZBA-NEXT: .LBB21_14:
+; RV32ZBA-NEXT: neg a5, a2
+; RV32ZBA-NEXT: snez a6, a2
+; RV32ZBA-NEXT: neg a7, a3
+; RV32ZBA-NEXT: sub a6, a7, a6
+; RV32ZBA-NEXT: bltz a3, .LBB21_16
+; RV32ZBA-NEXT: .LBB21_15: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a6, a3
+; RV32ZBA-NEXT: mv a5, a2
+; RV32ZBA-NEXT: .LBB21_16: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB21_22
+; RV32ZBA-NEXT: # %bb.17: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a7, a0
+; RV32ZBA-NEXT: mv a2, a1
+; RV32ZBA-NEXT: j .LBB21_23
+; RV32ZBA-NEXT: .LBB21_18:
+; RV32ZBA-NEXT: neg a7, a2
+; RV32ZBA-NEXT: snez a0, a2
+; RV32ZBA-NEXT: neg t0, a3
+; RV32ZBA-NEXT: sub a0, t0, a0
+; RV32ZBA-NEXT: .LBB21_19: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: slti a1, a1, 0
+; RV32ZBA-NEXT: slti t0, a3, 0
+; RV32ZBA-NEXT: bltz a3, .LBB21_21
+; RV32ZBA-NEXT: # %bb.20: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a0, a3
+; RV32ZBA-NEXT: mv a7, a2
+; RV32ZBA-NEXT: .LBB21_21: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu a2, a5, a7
+; RV32ZBA-NEXT: mul a3, a6, a7
+; RV32ZBA-NEXT: mul a7, a5, a7
+; RV32ZBA-NEXT: mul a6, a6, a0
+; RV32ZBA-NEXT: mulhu t1, a5, a0
+; RV32ZBA-NEXT: mul a0, a5, a0
+; RV32ZBA-NEXT: xor a1, t0, a1
+; RV32ZBA-NEXT: add a2, a2, a3
+; RV32ZBA-NEXT: add a6, t1, a6
+; RV32ZBA-NEXT: neg a3, a1
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: xor a5, a7, a3
+; RV32ZBA-NEXT: sltu a7, a0, a2
+; RV32ZBA-NEXT: add a2, a5, a1
+; RV32ZBA-NEXT: xor a0, a0, a3
+; RV32ZBA-NEXT: add a6, a6, a7
+; RV32ZBA-NEXT: sltu a5, a2, a1
+; RV32ZBA-NEXT: add a1, a0, a5
+; RV32ZBA-NEXT: sltu a0, a1, a5
+; RV32ZBA-NEXT: xor a3, a6, a3
; RV32ZBA-NEXT: add a0, a3, a0
-; RV32ZBA-NEXT: sltu a1, t0, a1
-; RV32ZBA-NEXT: xor a3, t0, a7
-; RV32ZBA-NEXT: add a0, t1, a0
+; RV32ZBA-NEXT: j .LBB21_26
+; RV32ZBA-NEXT: .LBB21_22:
+; RV32ZBA-NEXT: neg a7, a0
+; RV32ZBA-NEXT: snez a2, a0
+; RV32ZBA-NEXT: neg t0, a1
+; RV32ZBA-NEXT: sub a2, t0, a2
+; RV32ZBA-NEXT: .LBB21_23: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: slti a3, a3, 0
+; RV32ZBA-NEXT: slti t0, a1, 0
+; RV32ZBA-NEXT: bltz a1, .LBB21_25
+; RV32ZBA-NEXT: # %bb.24: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a2, a1
+; RV32ZBA-NEXT: mv a7, a0
+; RV32ZBA-NEXT: .LBB21_25: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu a0, a5, a7
+; RV32ZBA-NEXT: mul a1, a6, a7
+; RV32ZBA-NEXT: mul a7, a5, a7
+; RV32ZBA-NEXT: mul a6, a6, a2
+; RV32ZBA-NEXT: mulhu t1, a5, a2
+; RV32ZBA-NEXT: mul a2, a5, a2
+; RV32ZBA-NEXT: xor a3, a3, t0
; RV32ZBA-NEXT: add a0, a0, a1
-; RV32ZBA-NEXT: xor a0, a0, a7
-; RV32ZBA-NEXT: or a0, a3, a0
-; RV32ZBA-NEXT: snez a0, a0
+; RV32ZBA-NEXT: add a6, t1, a6
+; RV32ZBA-NEXT: neg a5, a3
+; RV32ZBA-NEXT: add a1, a0, a2
+; RV32ZBA-NEXT: xor a2, a7, a5
+; RV32ZBA-NEXT: sltu a0, a1, a0
+; RV32ZBA-NEXT: add a2, a2, a3
+; RV32ZBA-NEXT: xor a1, a1, a5
+; RV32ZBA-NEXT: add a0, a6, a0
+; RV32ZBA-NEXT: sltu a3, a2, a3
+; RV32ZBA-NEXT: add a1, a1, a3
+; RV32ZBA-NEXT: sltu a3, a1, a3
+; RV32ZBA-NEXT: xor a0, a0, a5
+; RV32ZBA-NEXT: add a0, a0, a3
+; RV32ZBA-NEXT: .LBB21_26: # %overflow.res
+; RV32ZBA-NEXT: snez a5, a0
+; RV32ZBA-NEXT: .LBB21_27: # %overflow.res
+; RV32ZBA-NEXT: andi a0, a5, 1
; RV32ZBA-NEXT: sw a2, 0(a4)
-; RV32ZBA-NEXT: sw a5, 4(a4)
+; RV32ZBA-NEXT: sw a1, 4(a4)
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: smulo.i64:
@@ -1404,38 +1674,165 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: smulo.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: srai a6, a0, 31
+; RV32ZICOND-NEXT: srai a5, a2, 31
+; RV32ZICOND-NEXT: beq a1, a6, .LBB21_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beq a3, a5, .LBB21_5
+; RV32ZICOND-NEXT: # %bb.2: # %overflow
; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mul a6, a1, a2
; RV32ZICOND-NEXT: mulhsu a7, a1, a2
; RV32ZICOND-NEXT: mul t0, a3, a0
; RV32ZICOND-NEXT: mulh t1, a1, a3
-; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: mul t2, a1, a3
; RV32ZICOND-NEXT: mulhsu a3, a3, a0
-; RV32ZICOND-NEXT: mul a2, a0, a2
-; RV32ZICOND-NEXT: add a5, a6, a5
-; RV32ZICOND-NEXT: sltu a0, a5, a6
-; RV32ZICOND-NEXT: add a5, t0, a5
-; RV32ZICOND-NEXT: add a0, a7, a0
-; RV32ZICOND-NEXT: sltu a6, a5, t0
-; RV32ZICOND-NEXT: srai a7, a5, 31
+; RV32ZICOND-NEXT: add a1, a6, a5
+; RV32ZICOND-NEXT: sltu a5, a1, a6
+; RV32ZICOND-NEXT: add a1, t0, a1
+; RV32ZICOND-NEXT: add a5, a7, a5
+; RV32ZICOND-NEXT: sltu a6, a1, t0
; RV32ZICOND-NEXT: add a3, a3, a6
-; RV32ZICOND-NEXT: srai a6, a0, 31
-; RV32ZICOND-NEXT: add t0, a0, a3
-; RV32ZICOND-NEXT: srai a3, a3, 31
-; RV32ZICOND-NEXT: sltu a0, t0, a0
-; RV32ZICOND-NEXT: add a3, a6, a3
-; RV32ZICOND-NEXT: add t0, a1, t0
+; RV32ZICOND-NEXT: srai a6, a5, 31
+; RV32ZICOND-NEXT: srai a7, a3, 31
+; RV32ZICOND-NEXT: add a6, a6, a7
+; RV32ZICOND-NEXT: srai a7, a1, 31
+; RV32ZICOND-NEXT: add a3, a5, a3
+; RV32ZICOND-NEXT: sltu a5, a3, a5
+; RV32ZICOND-NEXT: add a3, t2, a3
+; RV32ZICOND-NEXT: add a5, a6, a5
+; RV32ZICOND-NEXT: sltu a6, a3, t2
+; RV32ZICOND-NEXT: xor a3, a3, a7
+; RV32ZICOND-NEXT: add a5, t1, a5
+; RV32ZICOND-NEXT: add a5, a5, a6
+; RV32ZICOND-NEXT: xor a5, a5, a7
+; RV32ZICOND-NEXT: or a3, a3, a5
+; RV32ZICOND-NEXT: snez a5, a3
+; RV32ZICOND-NEXT: j .LBB21_7
+; RV32ZICOND-NEXT: .LBB21_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beq a3, a5, .LBB21_6
+; RV32ZICOND-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: slti a5, a1, 0
+; RV32ZICOND-NEXT: neg a6, a0
+; RV32ZICOND-NEXT: snez a7, a0
+; RV32ZICOND-NEXT: neg t0, a1
+; RV32ZICOND-NEXT: snez t1, a2
+; RV32ZICOND-NEXT: sub a7, t0, a7
+; RV32ZICOND-NEXT: neg t0, a3
+; RV32ZICOND-NEXT: sub t0, t0, t1
+; RV32ZICOND-NEXT: slti t1, a3, 0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a5
+; RV32ZICOND-NEXT: czero.nez a0, a0, a5
+; RV32ZICOND-NEXT: or a6, a6, a0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a5
+; RV32ZICOND-NEXT: or a0, a6, a0
+; RV32ZICOND-NEXT: neg a6, a2
+; RV32ZICOND-NEXT: czero.nez a1, a1, a5
+; RV32ZICOND-NEXT: czero.eqz a6, a6, t1
+; RV32ZICOND-NEXT: czero.nez a2, a2, t1
+; RV32ZICOND-NEXT: czero.nez a3, a3, t1
+; RV32ZICOND-NEXT: czero.eqz a7, a7, a5
+; RV32ZICOND-NEXT: or a7, a7, a1
+; RV32ZICOND-NEXT: czero.eqz a7, a7, a5
+; RV32ZICOND-NEXT: xor a5, t1, a5
+; RV32ZICOND-NEXT: or a6, a6, a2
+; RV32ZICOND-NEXT: czero.eqz t0, t0, t1
+; RV32ZICOND-NEXT: or t0, t0, a3
+; RV32ZICOND-NEXT: czero.eqz a6, a6, t1
+; RV32ZICOND-NEXT: czero.eqz t0, t0, t1
+; RV32ZICOND-NEXT: neg t1, a5
+; RV32ZICOND-NEXT: or a2, a6, a2
+; RV32ZICOND-NEXT: or a1, a7, a1
+; RV32ZICOND-NEXT: or a3, t0, a3
+; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: mul a7, a0, a2
+; RV32ZICOND-NEXT: mul a2, a1, a2
+; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: mulhu t0, a0, a3
+; RV32ZICOND-NEXT: mul a0, a0, a3
+; RV32ZICOND-NEXT: xor a3, a7, t1
+; RV32ZICOND-NEXT: add a6, a6, a2
+; RV32ZICOND-NEXT: add a1, t0, a1
+; RV32ZICOND-NEXT: add a2, a3, a5
+; RV32ZICOND-NEXT: add a0, a6, a0
+; RV32ZICOND-NEXT: sltu a3, a2, a5
+; RV32ZICOND-NEXT: sltu a5, a0, a6
+; RV32ZICOND-NEXT: xor a0, a0, t1
+; RV32ZICOND-NEXT: add a5, a1, a5
+; RV32ZICOND-NEXT: add a1, a0, a3
+; RV32ZICOND-NEXT: sltu a0, a1, a3
+; RV32ZICOND-NEXT: xor a3, a5, t1
; RV32ZICOND-NEXT: add a0, a3, a0
-; RV32ZICOND-NEXT: sltu a1, t0, a1
-; RV32ZICOND-NEXT: xor a3, t0, a7
-; RV32ZICOND-NEXT: add a0, t1, a0
-; RV32ZICOND-NEXT: add a0, a0, a1
-; RV32ZICOND-NEXT: xor a0, a0, a7
-; RV32ZICOND-NEXT: or a0, a3, a0
-; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: snez a5, a0
+; RV32ZICOND-NEXT: j .LBB21_8
+; RV32ZICOND-NEXT: .LBB21_5: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: slti a5, a3, 0
+; RV32ZICOND-NEXT: neg a6, a2
+; RV32ZICOND-NEXT: snez a7, a2
+; RV32ZICOND-NEXT: neg t0, a3
+; RV32ZICOND-NEXT: snez t1, a0
+; RV32ZICOND-NEXT: sub a7, t0, a7
+; RV32ZICOND-NEXT: neg t0, a1
+; RV32ZICOND-NEXT: sub t0, t0, t1
+; RV32ZICOND-NEXT: slti t1, a1, 0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a5
+; RV32ZICOND-NEXT: czero.nez a2, a2, a5
+; RV32ZICOND-NEXT: or a6, a6, a2
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a5
+; RV32ZICOND-NEXT: or a2, a6, a2
+; RV32ZICOND-NEXT: neg a6, a0
+; RV32ZICOND-NEXT: czero.nez a3, a3, a5
+; RV32ZICOND-NEXT: czero.eqz a6, a6, t1
+; RV32ZICOND-NEXT: czero.nez a0, a0, t1
+; RV32ZICOND-NEXT: czero.nez a1, a1, t1
+; RV32ZICOND-NEXT: czero.eqz a7, a7, a5
+; RV32ZICOND-NEXT: or a7, a7, a3
+; RV32ZICOND-NEXT: czero.eqz a7, a7, a5
+; RV32ZICOND-NEXT: xor a5, a5, t1
+; RV32ZICOND-NEXT: or a6, a6, a0
+; RV32ZICOND-NEXT: czero.eqz t0, t0, t1
+; RV32ZICOND-NEXT: or t0, t0, a1
+; RV32ZICOND-NEXT: czero.eqz a6, a6, t1
+; RV32ZICOND-NEXT: czero.eqz t0, t0, t1
+; RV32ZICOND-NEXT: neg t1, a5
+; RV32ZICOND-NEXT: or a0, a6, a0
+; RV32ZICOND-NEXT: or a3, a7, a3
+; RV32ZICOND-NEXT: or a1, t0, a1
+; RV32ZICOND-NEXT: mulhu a6, a2, a0
+; RV32ZICOND-NEXT: mul a7, a2, a0
+; RV32ZICOND-NEXT: mul a0, a3, a0
+; RV32ZICOND-NEXT: mul a3, a3, a1
+; RV32ZICOND-NEXT: mulhu t0, a2, a1
+; RV32ZICOND-NEXT: mul a1, a2, a1
+; RV32ZICOND-NEXT: xor a2, a7, t1
+; RV32ZICOND-NEXT: add a0, a6, a0
+; RV32ZICOND-NEXT: add a3, t0, a3
+; RV32ZICOND-NEXT: add a2, a2, a5
+; RV32ZICOND-NEXT: add a1, a0, a1
+; RV32ZICOND-NEXT: sltu a5, a2, a5
+; RV32ZICOND-NEXT: sltu a0, a1, a0
+; RV32ZICOND-NEXT: xor a1, a1, t1
+; RV32ZICOND-NEXT: add a0, a3, a0
+; RV32ZICOND-NEXT: add a1, a1, a5
+; RV32ZICOND-NEXT: sltu a3, a1, a5
+; RV32ZICOND-NEXT: xor a0, a0, t1
+; RV32ZICOND-NEXT: add a0, a0, a3
+; RV32ZICOND-NEXT: snez a5, a0
+; RV32ZICOND-NEXT: j .LBB21_8
+; RV32ZICOND-NEXT: .LBB21_6: # %overflow.no
+; RV32ZICOND-NEXT: li a5, 0
+; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: mul a3, a0, a3
+; RV32ZICOND-NEXT: add a3, a6, a3
+; RV32ZICOND-NEXT: mul a1, a1, a2
+; RV32ZICOND-NEXT: add a1, a3, a1
+; RV32ZICOND-NEXT: .LBB21_7: # %overflow.res
+; RV32ZICOND-NEXT: mul a2, a0, a2
+; RV32ZICOND-NEXT: .LBB21_8: # %overflow.res
+; RV32ZICOND-NEXT: andi a0, a5, 1
; RV32ZICOND-NEXT: sw a2, 0(a4)
-; RV32ZICOND-NEXT: sw a5, 4(a4)
+; RV32ZICOND-NEXT: sw a1, 4(a4)
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: smulo.i64:
@@ -1457,23 +1854,57 @@ entry:
define zeroext i1 @smulo2.i64(i64 %v1, ptr %res) {
; RV32-LABEL: smulo2.i64:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: li a3, 13
-; RV32-NEXT: mulhu a4, a0, a3
-; RV32-NEXT: mul a5, a1, a3
-; RV32-NEXT: mulh a1, a1, a3
-; RV32-NEXT: mul a3, a0, a3
-; RV32-NEXT: add a4, a5, a4
-; RV32-NEXT: sltu a0, a4, a5
-; RV32-NEXT: srai a5, a4, 31
-; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: xor a1, a0, a5
-; RV32-NEXT: srai a0, a0, 31
-; RV32-NEXT: xor a0, a0, a5
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: snez a0, a0
-; RV32-NEXT: sw a3, 0(a2)
-; RV32-NEXT: sw a4, 4(a2)
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: srai a3, a0, 31
+; RV32-NEXT: beq a1, a3, .LBB22_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: bltz a1, .LBB22_4
+; RV32-NEXT: # %bb.2: # %overflow.lhs
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bgez a1, .LBB22_5
+; RV32-NEXT: j .LBB22_6
+; RV32-NEXT: .LBB22_3: # %overflow.no.lhs
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: li a5, 13
+; RV32-NEXT: mulhu a3, a0, a5
+; RV32-NEXT: mul a1, a1, a5
+; RV32-NEXT: add a3, a3, a1
+; RV32-NEXT: mul a1, a0, a5
+; RV32-NEXT: j .LBB22_7
+; RV32-NEXT: .LBB22_4:
+; RV32-NEXT: neg a3, a0
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: neg a5, a1
+; RV32-NEXT: sub a4, a5, a4
+; RV32-NEXT: bltz a1, .LBB22_6
+; RV32-NEXT: .LBB22_5: # %overflow.lhs
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: .LBB22_6: # %overflow.lhs
+; RV32-NEXT: li a0, 13
+; RV32-NEXT: mul a5, a3, a0
+; RV32-NEXT: mulhu a3, a3, a0
+; RV32-NEXT: mulhu a6, a4, a0
+; RV32-NEXT: mul a0, a4, a0
+; RV32-NEXT: srai a4, a1, 31
+; RV32-NEXT: srli a7, a1, 31
+; RV32-NEXT: add a0, a3, a0
+; RV32-NEXT: xor a1, a5, a4
+; RV32-NEXT: sltu a3, a0, a3
+; RV32-NEXT: add a1, a1, a7
+; RV32-NEXT: xor a0, a0, a4
+; RV32-NEXT: add a6, a6, a3
+; RV32-NEXT: sltu a5, a1, a7
+; RV32-NEXT: add a3, a0, a5
+; RV32-NEXT: sltu a0, a3, a5
+; RV32-NEXT: xor a4, a6, a4
+; RV32-NEXT: add a0, a4, a0
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: .LBB22_7: # %overflow.res
+; RV32-NEXT: andi a0, a4, 1
+; RV32-NEXT: sw a1, 0(a2)
+; RV32-NEXT: sw a3, 4(a2)
; RV32-NEXT: ret
;
; RV64-LABEL: smulo2.i64:
@@ -1488,25 +1919,61 @@ define zeroext i1 @smulo2.i64(i64 %v1, ptr %res) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: smulo2.i64:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: li a3, 13
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: srai a3, a0, 31
+; RV32ZBA-NEXT: beq a1, a3, .LBB22_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: bltz a1, .LBB22_4
+; RV32ZBA-NEXT: # %bb.2: # %overflow.lhs
+; RV32ZBA-NEXT: mv a3, a0
+; RV32ZBA-NEXT: mv a4, a1
+; RV32ZBA-NEXT: bgez a1, .LBB22_5
+; RV32ZBA-NEXT: j .LBB22_6
+; RV32ZBA-NEXT: .LBB22_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: li a3, 0
; RV32ZBA-NEXT: sh1add a4, a1, a1
-; RV32ZBA-NEXT: sh1add a5, a0, a0
; RV32ZBA-NEXT: sh2add a4, a4, a1
-; RV32ZBA-NEXT: mulh a1, a1, a3
-; RV32ZBA-NEXT: mulhu a3, a0, a3
-; RV32ZBA-NEXT: sh2add a5, a5, a0
+; RV32ZBA-NEXT: li a1, 13
+; RV32ZBA-NEXT: mulhu a1, a0, a1
+; RV32ZBA-NEXT: add a4, a1, a4
+; RV32ZBA-NEXT: sh1add a1, a0, a0
+; RV32ZBA-NEXT: sh2add a1, a1, a0
+; RV32ZBA-NEXT: j .LBB22_7
+; RV32ZBA-NEXT: .LBB22_4:
+; RV32ZBA-NEXT: neg a3, a0
+; RV32ZBA-NEXT: snez a4, a0
+; RV32ZBA-NEXT: neg a5, a1
+; RV32ZBA-NEXT: sub a4, a5, a4
+; RV32ZBA-NEXT: bltz a1, .LBB22_6
+; RV32ZBA-NEXT: .LBB22_5: # %overflow.lhs
+; RV32ZBA-NEXT: mv a4, a1
+; RV32ZBA-NEXT: mv a3, a0
+; RV32ZBA-NEXT: .LBB22_6: # %overflow.lhs
+; RV32ZBA-NEXT: sh1add a0, a3, a3
+; RV32ZBA-NEXT: li a5, 13
+; RV32ZBA-NEXT: sh1add a6, a4, a4
+; RV32ZBA-NEXT: sh2add a0, a0, a3
+; RV32ZBA-NEXT: mulhu a3, a3, a5
+; RV32ZBA-NEXT: sh2add a6, a6, a4
+; RV32ZBA-NEXT: mulhu a4, a4, a5
+; RV32ZBA-NEXT: srai a5, a1, 31
+; RV32ZBA-NEXT: srli a7, a1, 31
+; RV32ZBA-NEXT: add a6, a3, a6
+; RV32ZBA-NEXT: xor a0, a0, a5
+; RV32ZBA-NEXT: sltu a3, a6, a3
+; RV32ZBA-NEXT: add a1, a0, a7
+; RV32ZBA-NEXT: xor a0, a6, a5
; RV32ZBA-NEXT: add a3, a4, a3
-; RV32ZBA-NEXT: sltu a0, a3, a4
-; RV32ZBA-NEXT: srai a4, a3, 31
-; RV32ZBA-NEXT: add a0, a1, a0
-; RV32ZBA-NEXT: xor a1, a0, a4
-; RV32ZBA-NEXT: srai a0, a0, 31
-; RV32ZBA-NEXT: xor a0, a0, a4
-; RV32ZBA-NEXT: or a0, a1, a0
-; RV32ZBA-NEXT: snez a0, a0
-; RV32ZBA-NEXT: sw a5, 0(a2)
-; RV32ZBA-NEXT: sw a3, 4(a2)
+; RV32ZBA-NEXT: sltu a6, a1, a7
+; RV32ZBA-NEXT: add a4, a0, a6
+; RV32ZBA-NEXT: sltu a0, a4, a6
+; RV32ZBA-NEXT: xor a3, a3, a5
+; RV32ZBA-NEXT: add a0, a3, a0
+; RV32ZBA-NEXT: snez a3, a0
+; RV32ZBA-NEXT: .LBB22_7: # %overflow.res
+; RV32ZBA-NEXT: andi a0, a3, 1
+; RV32ZBA-NEXT: sw a1, 0(a2)
+; RV32ZBA-NEXT: sw a4, 4(a2)
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: smulo2.i64:
@@ -1522,23 +1989,56 @@ define zeroext i1 @smulo2.i64(i64 %v1, ptr %res) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: smulo2.i64:
-; RV32ZICOND: # %bb.0: # %entry
-; RV32ZICOND-NEXT: li a3, 13
-; RV32ZICOND-NEXT: mulhu a4, a0, a3
-; RV32ZICOND-NEXT: mul a5, a1, a3
-; RV32ZICOND-NEXT: mulh a1, a1, a3
-; RV32ZICOND-NEXT: mul a3, a0, a3
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: sltu a0, a4, a5
-; RV32ZICOND-NEXT: srai a5, a4, 31
-; RV32ZICOND-NEXT: add a0, a1, a0
-; RV32ZICOND-NEXT: xor a1, a0, a5
-; RV32ZICOND-NEXT: srai a0, a0, 31
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: srai a3, a0, 31
+; RV32ZICOND-NEXT: beq a1, a3, .LBB22_2
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: slti a3, a1, 0
+; RV32ZICOND-NEXT: neg a4, a0
+; RV32ZICOND-NEXT: snez a5, a0
+; RV32ZICOND-NEXT: neg a6, a1
+; RV32ZICOND-NEXT: czero.eqz a4, a4, a3
+; RV32ZICOND-NEXT: czero.nez a0, a0, a3
+; RV32ZICOND-NEXT: sub a5, a6, a5
+; RV32ZICOND-NEXT: czero.nez a6, a1, a3
+; RV32ZICOND-NEXT: or a4, a4, a0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
+; RV32ZICOND-NEXT: or a5, a5, a6
+; RV32ZICOND-NEXT: czero.eqz a4, a4, a3
+; RV32ZICOND-NEXT: czero.eqz a3, a5, a3
+; RV32ZICOND-NEXT: li a5, 13
+; RV32ZICOND-NEXT: or a0, a4, a0
+; RV32ZICOND-NEXT: or a3, a3, a6
+; RV32ZICOND-NEXT: mul a4, a0, a5
+; RV32ZICOND-NEXT: mulhu a0, a0, a5
+; RV32ZICOND-NEXT: mulhu a6, a3, a5
+; RV32ZICOND-NEXT: mul a3, a3, a5
+; RV32ZICOND-NEXT: srai a5, a1, 31
+; RV32ZICOND-NEXT: srli a7, a1, 31
+; RV32ZICOND-NEXT: xor a1, a4, a5
+; RV32ZICOND-NEXT: add a3, a0, a3
+; RV32ZICOND-NEXT: add a1, a1, a7
+; RV32ZICOND-NEXT: sltu a0, a3, a0
+; RV32ZICOND-NEXT: sltu a4, a1, a7
+; RV32ZICOND-NEXT: xor a3, a3, a5
+; RV32ZICOND-NEXT: add a0, a6, a0
+; RV32ZICOND-NEXT: add a3, a3, a4
+; RV32ZICOND-NEXT: sltu a4, a3, a4
; RV32ZICOND-NEXT: xor a0, a0, a5
-; RV32ZICOND-NEXT: or a0, a1, a0
-; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: sw a3, 0(a2)
-; RV32ZICOND-NEXT: sw a4, 4(a2)
+; RV32ZICOND-NEXT: add a0, a0, a4
+; RV32ZICOND-NEXT: snez a4, a0
+; RV32ZICOND-NEXT: j .LBB22_3
+; RV32ZICOND-NEXT: .LBB22_2: # %overflow.no.lhs
+; RV32ZICOND-NEXT: li a4, 0
+; RV32ZICOND-NEXT: li a5, 13
+; RV32ZICOND-NEXT: mulhu a3, a0, a5
+; RV32ZICOND-NEXT: mul a1, a1, a5
+; RV32ZICOND-NEXT: add a3, a3, a1
+; RV32ZICOND-NEXT: mul a1, a0, a5
+; RV32ZICOND-NEXT: .LBB22_3: # %overflow.res
+; RV32ZICOND-NEXT: andi a0, a4, 1
+; RV32ZICOND-NEXT: sw a1, 0(a2)
+; RV32ZICOND-NEXT: sw a3, 4(a2)
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: smulo2.i64:
@@ -1766,26 +2266,71 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) {
define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV32-LABEL: umulo.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: beqz a1, .LBB26_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beqz a3, .LBB26_5
+; RV32-NEXT: # %bb.2: # %overflow
; RV32-NEXT: mul a5, a3, a0
; RV32-NEXT: mul a6, a1, a2
; RV32-NEXT: mulhu a7, a0, a2
; RV32-NEXT: snez t0, a3
-; RV32-NEXT: mulhu a3, a3, a0
-; RV32-NEXT: mul t1, a0, a2
-; RV32-NEXT: mulhu a0, a1, a2
-; RV32-NEXT: snez a1, a1
; RV32-NEXT: add a5, a6, a5
-; RV32-NEXT: and a1, a1, t0
-; RV32-NEXT: snez a0, a0
-; RV32-NEXT: snez a2, a3
-; RV32-NEXT: add a5, a7, a5
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: sltu a1, a5, a7
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: sw t1, 0(a4)
-; RV32-NEXT: sw a5, 4(a4)
+; RV32-NEXT: snez a6, a1
+; RV32-NEXT: mulhu a1, a1, a2
+; RV32-NEXT: mulhu a3, a3, a0
+; RV32-NEXT: and a6, a6, t0
+; RV32-NEXT: snez t0, a1
+; RV32-NEXT: snez a3, a3
+; RV32-NEXT: add a1, a7, a5
+; RV32-NEXT: or a5, a6, t0
+; RV32-NEXT: sltu a6, a1, a7
+; RV32-NEXT: or a3, a5, a3
+; RV32-NEXT: or a6, a3, a6
+; RV32-NEXT: j .LBB26_7
+; RV32-NEXT: .LBB26_3: # %overflow.no.lhs
+; RV32-NEXT: beqz a3, .LBB26_6
+; RV32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu a6, a0, a2
+; RV32-NEXT: mul a7, a1, a2
+; RV32-NEXT: mul a5, a0, a2
+; RV32-NEXT: add a6, a6, a7
+; RV32-NEXT: mulhu a2, a0, a3
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a2, a2, a1
+; RV32-NEXT: mul a1, a0, a3
+; RV32-NEXT: add a1, a6, a1
+; RV32-NEXT: sltu a0, a1, a6
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: snez a6, a0
+; RV32-NEXT: j .LBB26_8
+; RV32-NEXT: .LBB26_5: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu a6, a2, a0
+; RV32-NEXT: mul a7, a3, a0
+; RV32-NEXT: mul a5, a2, a0
+; RV32-NEXT: add a6, a6, a7
+; RV32-NEXT: mulhu a0, a2, a1
+; RV32-NEXT: mul a3, a3, a1
+; RV32-NEXT: add a0, a0, a3
+; RV32-NEXT: mul a1, a2, a1
+; RV32-NEXT: add a1, a6, a1
+; RV32-NEXT: sltu a2, a1, a6
+; RV32-NEXT: add a0, a0, a2
+; RV32-NEXT: snez a6, a0
+; RV32-NEXT: j .LBB26_8
+; RV32-NEXT: .LBB26_6: # %overflow.no
+; RV32-NEXT: li a6, 0
+; RV32-NEXT: mulhu a5, a0, a2
+; RV32-NEXT: mul a3, a0, a3
+; RV32-NEXT: add a3, a5, a3
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: .LBB26_7: # %overflow.res
+; RV32-NEXT: mul a5, a0, a2
+; RV32-NEXT: .LBB26_8: # %overflow.res
+; RV32-NEXT: andi a0, a6, 1
+; RV32-NEXT: sw a5, 0(a4)
+; RV32-NEXT: sw a1, 4(a4)
; RV32-NEXT: ret
;
; RV64-LABEL: umulo.i64:
@@ -1798,26 +2343,71 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: umulo.i64:
-; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: beqz a1, .LBB26_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB26_5
+; RV32ZBA-NEXT: # %bb.2: # %overflow
; RV32ZBA-NEXT: mul a5, a3, a0
; RV32ZBA-NEXT: mul a6, a1, a2
; RV32ZBA-NEXT: mulhu a7, a0, a2
; RV32ZBA-NEXT: snez t0, a3
-; RV32ZBA-NEXT: mulhu a3, a3, a0
-; RV32ZBA-NEXT: mul t1, a0, a2
-; RV32ZBA-NEXT: mulhu a0, a1, a2
-; RV32ZBA-NEXT: snez a1, a1
; RV32ZBA-NEXT: add a5, a6, a5
-; RV32ZBA-NEXT: and a1, a1, t0
-; RV32ZBA-NEXT: snez a0, a0
-; RV32ZBA-NEXT: snez a2, a3
-; RV32ZBA-NEXT: add a5, a7, a5
-; RV32ZBA-NEXT: or a0, a1, a0
-; RV32ZBA-NEXT: sltu a1, a5, a7
-; RV32ZBA-NEXT: or a0, a0, a2
-; RV32ZBA-NEXT: or a0, a0, a1
-; RV32ZBA-NEXT: sw t1, 0(a4)
-; RV32ZBA-NEXT: sw a5, 4(a4)
+; RV32ZBA-NEXT: snez a6, a1
+; RV32ZBA-NEXT: mulhu a1, a1, a2
+; RV32ZBA-NEXT: mulhu a3, a3, a0
+; RV32ZBA-NEXT: and a6, a6, t0
+; RV32ZBA-NEXT: snez t0, a1
+; RV32ZBA-NEXT: snez a3, a3
+; RV32ZBA-NEXT: add a1, a7, a5
+; RV32ZBA-NEXT: or a5, a6, t0
+; RV32ZBA-NEXT: sltu a6, a1, a7
+; RV32ZBA-NEXT: or a3, a5, a3
+; RV32ZBA-NEXT: or a6, a3, a6
+; RV32ZBA-NEXT: j .LBB26_7
+; RV32ZBA-NEXT: .LBB26_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB26_6
+; RV32ZBA-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu a6, a0, a2
+; RV32ZBA-NEXT: mul a7, a1, a2
+; RV32ZBA-NEXT: mul a5, a0, a2
+; RV32ZBA-NEXT: add a6, a6, a7
+; RV32ZBA-NEXT: mulhu a2, a0, a3
+; RV32ZBA-NEXT: mul a1, a1, a3
+; RV32ZBA-NEXT: add a2, a2, a1
+; RV32ZBA-NEXT: mul a1, a0, a3
+; RV32ZBA-NEXT: add a1, a6, a1
+; RV32ZBA-NEXT: sltu a0, a1, a6
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: snez a6, a0
+; RV32ZBA-NEXT: j .LBB26_8
+; RV32ZBA-NEXT: .LBB26_5: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu a6, a2, a0
+; RV32ZBA-NEXT: mul a7, a3, a0
+; RV32ZBA-NEXT: mul a5, a2, a0
+; RV32ZBA-NEXT: add a6, a6, a7
+; RV32ZBA-NEXT: mulhu a0, a2, a1
+; RV32ZBA-NEXT: mul a3, a3, a1
+; RV32ZBA-NEXT: add a0, a0, a3
+; RV32ZBA-NEXT: mul a1, a2, a1
+; RV32ZBA-NEXT: add a1, a6, a1
+; RV32ZBA-NEXT: sltu a2, a1, a6
+; RV32ZBA-NEXT: add a0, a0, a2
+; RV32ZBA-NEXT: snez a6, a0
+; RV32ZBA-NEXT: j .LBB26_8
+; RV32ZBA-NEXT: .LBB26_6: # %overflow.no
+; RV32ZBA-NEXT: li a6, 0
+; RV32ZBA-NEXT: mulhu a5, a0, a2
+; RV32ZBA-NEXT: mul a3, a0, a3
+; RV32ZBA-NEXT: add a3, a5, a3
+; RV32ZBA-NEXT: mul a1, a1, a2
+; RV32ZBA-NEXT: add a1, a3, a1
+; RV32ZBA-NEXT: .LBB26_7: # %overflow.res
+; RV32ZBA-NEXT: mul a5, a0, a2
+; RV32ZBA-NEXT: .LBB26_8: # %overflow.res
+; RV32ZBA-NEXT: andi a0, a6, 1
+; RV32ZBA-NEXT: sw a5, 0(a4)
+; RV32ZBA-NEXT: sw a1, 4(a4)
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: umulo.i64:
@@ -1830,26 +2420,71 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, ptr %res) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: umulo.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: beqz a1, .LBB26_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB26_5
+; RV32ZICOND-NEXT: # %bb.2: # %overflow
; RV32ZICOND-NEXT: mul a5, a3, a0
; RV32ZICOND-NEXT: mul a6, a1, a2
; RV32ZICOND-NEXT: mulhu a7, a0, a2
; RV32ZICOND-NEXT: snez t0, a3
-; RV32ZICOND-NEXT: mulhu a3, a3, a0
-; RV32ZICOND-NEXT: mul t1, a0, a2
-; RV32ZICOND-NEXT: mulhu a0, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
; RV32ZICOND-NEXT: add a5, a6, a5
-; RV32ZICOND-NEXT: and a1, a1, t0
-; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: snez a2, a3
-; RV32ZICOND-NEXT: add a5, a7, a5
-; RV32ZICOND-NEXT: or a0, a1, a0
-; RV32ZICOND-NEXT: sltu a1, a5, a7
-; RV32ZICOND-NEXT: or a0, a0, a2
-; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: sw t1, 0(a4)
-; RV32ZICOND-NEXT: sw a5, 4(a4)
+; RV32ZICOND-NEXT: snez a6, a1
+; RV32ZICOND-NEXT: mulhu a1, a1, a2
+; RV32ZICOND-NEXT: mulhu a3, a3, a0
+; RV32ZICOND-NEXT: and a6, a6, t0
+; RV32ZICOND-NEXT: snez t0, a1
+; RV32ZICOND-NEXT: snez a3, a3
+; RV32ZICOND-NEXT: add a1, a7, a5
+; RV32ZICOND-NEXT: or a5, a6, t0
+; RV32ZICOND-NEXT: sltu a6, a1, a7
+; RV32ZICOND-NEXT: or a3, a5, a3
+; RV32ZICOND-NEXT: or a6, a3, a6
+; RV32ZICOND-NEXT: j .LBB26_7
+; RV32ZICOND-NEXT: .LBB26_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB26_6
+; RV32ZICOND-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: mul a7, a1, a2
+; RV32ZICOND-NEXT: mul a5, a0, a2
+; RV32ZICOND-NEXT: add a6, a6, a7
+; RV32ZICOND-NEXT: mulhu a2, a0, a3
+; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: add a2, a2, a1
+; RV32ZICOND-NEXT: mul a1, a0, a3
+; RV32ZICOND-NEXT: add a1, a6, a1
+; RV32ZICOND-NEXT: sltu a0, a1, a6
+; RV32ZICOND-NEXT: add a0, a2, a0
+; RV32ZICOND-NEXT: snez a6, a0
+; RV32ZICOND-NEXT: j .LBB26_8
+; RV32ZICOND-NEXT: .LBB26_5: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: mulhu a6, a2, a0
+; RV32ZICOND-NEXT: mul a7, a3, a0
+; RV32ZICOND-NEXT: mul a5, a2, a0
+; RV32ZICOND-NEXT: add a6, a6, a7
+; RV32ZICOND-NEXT: mulhu a0, a2, a1
+; RV32ZICOND-NEXT: mul a3, a3, a1
+; RV32ZICOND-NEXT: add a0, a0, a3
+; RV32ZICOND-NEXT: mul a1, a2, a1
+; RV32ZICOND-NEXT: add a1, a6, a1
+; RV32ZICOND-NEXT: sltu a2, a1, a6
+; RV32ZICOND-NEXT: add a0, a0, a2
+; RV32ZICOND-NEXT: snez a6, a0
+; RV32ZICOND-NEXT: j .LBB26_8
+; RV32ZICOND-NEXT: .LBB26_6: # %overflow.no
+; RV32ZICOND-NEXT: li a6, 0
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
+; RV32ZICOND-NEXT: mul a3, a0, a3
+; RV32ZICOND-NEXT: add a3, a5, a3
+; RV32ZICOND-NEXT: mul a1, a1, a2
+; RV32ZICOND-NEXT: add a1, a3, a1
+; RV32ZICOND-NEXT: .LBB26_7: # %overflow.res
+; RV32ZICOND-NEXT: mul a5, a0, a2
+; RV32ZICOND-NEXT: .LBB26_8: # %overflow.res
+; RV32ZICOND-NEXT: andi a0, a6, 1
+; RV32ZICOND-NEXT: sw a5, 0(a4)
+; RV32ZICOND-NEXT: sw a1, 4(a4)
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: umulo.i64:
@@ -1870,18 +2505,30 @@ entry:
define zeroext i1 @umulo2.i64(i64 %v1, ptr %res) {
; RV32-LABEL: umulo2.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: beqz a1, .LBB27_2
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: li a4, 13
+; RV32-NEXT: mul a3, a0, a4
+; RV32-NEXT: mulhu a0, a0, a4
+; RV32-NEXT: mulhu a5, a1, a4
+; RV32-NEXT: mul a1, a1, a4
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a5, a0
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: j .LBB27_3
+; RV32-NEXT: .LBB27_2: # %overflow.no.lhs
+; RV32-NEXT: li a4, 0
; RV32-NEXT: li a3, 13
-; RV32-NEXT: mul a4, a1, a3
; RV32-NEXT: mulhu a5, a0, a3
-; RV32-NEXT: mulhu a1, a1, a3
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, a5, a1
; RV32-NEXT: mul a3, a0, a3
-; RV32-NEXT: add a4, a5, a4
-; RV32-NEXT: snez a0, a1
-; RV32-NEXT: sltu a1, a4, a5
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: .LBB27_3: # %overflow.res
+; RV32-NEXT: andi a0, a4, 1
; RV32-NEXT: sw a3, 0(a2)
-; RV32-NEXT: sw a4, 4(a2)
+; RV32-NEXT: sw a1, 4(a2)
; RV32-NEXT: ret
;
; RV64-LABEL: umulo2.i64:
@@ -1895,20 +2542,34 @@ define zeroext i1 @umulo2.i64(i64 %v1, ptr %res) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: umulo2.i64:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: li a3, 13
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: beqz a1, .LBB27_2
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: sh1add a3, a0, a0
+; RV32ZBA-NEXT: li a5, 13
+; RV32ZBA-NEXT: sh1add a6, a1, a1
+; RV32ZBA-NEXT: sh2add a4, a3, a0
+; RV32ZBA-NEXT: mulhu a0, a0, a5
+; RV32ZBA-NEXT: mulhu a3, a1, a5
+; RV32ZBA-NEXT: sh2add a1, a6, a1
+; RV32ZBA-NEXT: add a1, a0, a1
+; RV32ZBA-NEXT: sltu a0, a1, a0
+; RV32ZBA-NEXT: add a0, a3, a0
+; RV32ZBA-NEXT: snez a3, a0
+; RV32ZBA-NEXT: j .LBB27_3
+; RV32ZBA-NEXT: .LBB27_2: # %overflow.no.lhs
+; RV32ZBA-NEXT: li a3, 0
; RV32ZBA-NEXT: sh1add a4, a1, a1
-; RV32ZBA-NEXT: sh1add a5, a0, a0
-; RV32ZBA-NEXT: sh2add a4, a4, a1
-; RV32ZBA-NEXT: mulhu a1, a1, a3
-; RV32ZBA-NEXT: mulhu a3, a0, a3
-; RV32ZBA-NEXT: sh2add a5, a5, a0
-; RV32ZBA-NEXT: add a4, a3, a4
-; RV32ZBA-NEXT: snez a0, a1
-; RV32ZBA-NEXT: sltu a1, a4, a3
-; RV32ZBA-NEXT: or a0, a0, a1
-; RV32ZBA-NEXT: sw a5, 0(a2)
-; RV32ZBA-NEXT: sw a4, 4(a2)
+; RV32ZBA-NEXT: sh2add a1, a4, a1
+; RV32ZBA-NEXT: li a4, 13
+; RV32ZBA-NEXT: mulhu a4, a0, a4
+; RV32ZBA-NEXT: add a1, a4, a1
+; RV32ZBA-NEXT: sh1add a4, a0, a0
+; RV32ZBA-NEXT: sh2add a4, a4, a0
+; RV32ZBA-NEXT: .LBB27_3: # %overflow.res
+; RV32ZBA-NEXT: andi a0, a3, 1
+; RV32ZBA-NEXT: sw a4, 0(a2)
+; RV32ZBA-NEXT: sw a1, 4(a2)
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: umulo2.i64:
@@ -1923,18 +2584,30 @@ define zeroext i1 @umulo2.i64(i64 %v1, ptr %res) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: umulo2.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: beqz a1, .LBB27_2
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: li a4, 13
+; RV32ZICOND-NEXT: mul a3, a0, a4
+; RV32ZICOND-NEXT: mulhu a0, a0, a4
+; RV32ZICOND-NEXT: mulhu a5, a1, a4
+; RV32ZICOND-NEXT: mul a1, a1, a4
+; RV32ZICOND-NEXT: add a1, a0, a1
+; RV32ZICOND-NEXT: sltu a0, a1, a0
+; RV32ZICOND-NEXT: add a0, a5, a0
+; RV32ZICOND-NEXT: snez a4, a0
+; RV32ZICOND-NEXT: j .LBB27_3
+; RV32ZICOND-NEXT: .LBB27_2: # %overflow.no.lhs
+; RV32ZICOND-NEXT: li a4, 0
; RV32ZICOND-NEXT: li a3, 13
-; RV32ZICOND-NEXT: mul a4, a1, a3
; RV32ZICOND-NEXT: mulhu a5, a0, a3
-; RV32ZICOND-NEXT: mulhu a1, a1, a3
+; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: add a1, a5, a1
; RV32ZICOND-NEXT: mul a3, a0, a3
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: snez a0, a1
-; RV32ZICOND-NEXT: sltu a1, a4, a5
-; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: .LBB27_3: # %overflow.res
+; RV32ZICOND-NEXT: andi a0, a4, 1
; RV32ZICOND-NEXT: sw a3, 0(a2)
-; RV32ZICOND-NEXT: sw a4, 4(a2)
+; RV32ZICOND-NEXT: sw a1, 4(a2)
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: umulo2.i64:
@@ -3218,7 +3891,13 @@ entry:
define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
; RV32-LABEL: smulo.select.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: srai a5, a0, 31
+; RV32-NEXT: srai a4, a2, 31
+; RV32-NEXT: beq a1, a5, .LBB46_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beq a3, a4, .LBB46_6
+; RV32-NEXT: # %bb.2: # %overflow
; RV32-NEXT: mulhu a4, a0, a2
; RV32-NEXT: mul a5, a1, a2
; RV32-NEXT: mulhsu a6, a1, a2
@@ -3246,11 +3925,119 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
; RV32-NEXT: xor a5, a5, a4
; RV32-NEXT: xor a4, a6, a4
; RV32-NEXT: or a4, a4, a5
-; RV32-NEXT: bnez a4, .LBB46_2
-; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: j .LBB46_26
+; RV32-NEXT: .LBB46_3: # %overflow.no.lhs
+; RV32-NEXT: beq a3, a4, .LBB46_8
+; RV32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a1, .LBB46_9
+; RV32-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: bgez a1, .LBB46_10
+; RV32-NEXT: j .LBB46_11
+; RV32-NEXT: .LBB46_6: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a3, .LBB46_13
+; RV32-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: bgez a3, .LBB46_14
+; RV32-NEXT: j .LBB46_15
+; RV32-NEXT: .LBB46_8: # %overflow.no
+; RV32-NEXT: j .LBB46_27
+; RV32-NEXT: .LBB46_9:
+; RV32-NEXT: neg a4, a0
+; RV32-NEXT: snez a5, a0
+; RV32-NEXT: neg a6, a1
+; RV32-NEXT: sub a5, a6, a5
+; RV32-NEXT: bltz a1, .LBB46_11
+; RV32-NEXT: .LBB46_10: # %overflow.no.lhs.only
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: .LBB46_11: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a3, .LBB46_17
+; RV32-NEXT: # %bb.12: # %overflow.no.lhs.only
+; RV32-NEXT: mv a7, a2
+; RV32-NEXT: mv a6, a3
+; RV32-NEXT: j .LBB46_18
+; RV32-NEXT: .LBB46_13:
+; RV32-NEXT: neg a4, a2
+; RV32-NEXT: snez a5, a2
+; RV32-NEXT: neg a6, a3
+; RV32-NEXT: sub a5, a6, a5
+; RV32-NEXT: bltz a3, .LBB46_15
+; RV32-NEXT: .LBB46_14: # %overflow.no.rhs.only
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: .LBB46_15: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a1, .LBB46_21
+; RV32-NEXT: # %bb.16: # %overflow.no.rhs.only
+; RV32-NEXT: mv a7, a0
+; RV32-NEXT: mv a6, a1
+; RV32-NEXT: j .LBB46_22
+; RV32-NEXT: .LBB46_17:
+; RV32-NEXT: neg a7, a2
+; RV32-NEXT: snez a6, a2
+; RV32-NEXT: neg t0, a3
+; RV32-NEXT: sub a6, t0, a6
+; RV32-NEXT: .LBB46_18: # %overflow.no.lhs.only
+; RV32-NEXT: slti t0, a1, 0
+; RV32-NEXT: slti t1, a3, 0
+; RV32-NEXT: bltz a3, .LBB46_20
+; RV32-NEXT: # %bb.19: # %overflow.no.lhs.only
+; RV32-NEXT: mv a6, a3
+; RV32-NEXT: mv a7, a2
+; RV32-NEXT: .LBB46_20: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu t2, a4, a7
+; RV32-NEXT: mul t3, a5, a7
+; RV32-NEXT: mul a7, a4, a7
+; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: mulhu t4, a4, a6
+; RV32-NEXT: mul a4, a4, a6
+; RV32-NEXT: xor a6, t1, t0
+; RV32-NEXT: j .LBB46_25
+; RV32-NEXT: .LBB46_21:
+; RV32-NEXT: neg a7, a0
+; RV32-NEXT: snez a6, a0
+; RV32-NEXT: neg t0, a1
+; RV32-NEXT: sub a6, t0, a6
+; RV32-NEXT: .LBB46_22: # %overflow.no.rhs.only
+; RV32-NEXT: slti t0, a3, 0
+; RV32-NEXT: slti t1, a1, 0
+; RV32-NEXT: bltz a1, .LBB46_24
+; RV32-NEXT: # %bb.23: # %overflow.no.rhs.only
+; RV32-NEXT: mv a6, a1
+; RV32-NEXT: mv a7, a0
+; RV32-NEXT: .LBB46_24: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu t2, a4, a7
+; RV32-NEXT: mul t3, a5, a7
+; RV32-NEXT: mul a7, a4, a7
+; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: mulhu t4, a4, a6
+; RV32-NEXT: mul a4, a4, a6
+; RV32-NEXT: xor a6, t0, t1
+; RV32-NEXT: .LBB46_25: # %overflow.res
+; RV32-NEXT: add t2, t2, t3
+; RV32-NEXT: add a5, t4, a5
+; RV32-NEXT: neg t0, a6
+; RV32-NEXT: add a4, t2, a4
+; RV32-NEXT: xor a7, a7, t0
+; RV32-NEXT: sltu t1, a4, t2
+; RV32-NEXT: add a7, a7, a6
+; RV32-NEXT: xor a4, a4, t0
+; RV32-NEXT: add a5, a5, t1
+; RV32-NEXT: sltu a6, a7, a6
+; RV32-NEXT: add a4, a4, a6
+; RV32-NEXT: sltu a4, a4, a6
+; RV32-NEXT: xor a5, a5, t0
+; RV32-NEXT: add a4, a5, a4
+; RV32-NEXT: .LBB46_26: # %overflow.res
+; RV32-NEXT: snez a4, a4
+; RV32-NEXT: andi a4, a4, 1
+; RV32-NEXT: bnez a4, .LBB46_28
+; RV32-NEXT: .LBB46_27: # %overflow.res
; RV32-NEXT: mv a0, a2
; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB46_2: # %entry
+; RV32-NEXT: .LBB46_28: # %overflow.res
; RV32-NEXT: ret
;
; RV64-LABEL: smulo.select.i64:
@@ -3265,7 +4052,13 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: smulo.select.i64:
-; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: srai a5, a0, 31
+; RV32ZBA-NEXT: srai a4, a2, 31
+; RV32ZBA-NEXT: beq a1, a5, .LBB46_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beq a3, a4, .LBB46_6
+; RV32ZBA-NEXT: # %bb.2: # %overflow
; RV32ZBA-NEXT: mulhu a4, a0, a2
; RV32ZBA-NEXT: mul a5, a1, a2
; RV32ZBA-NEXT: mulhsu a6, a1, a2
@@ -3293,11 +4086,119 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
; RV32ZBA-NEXT: xor a5, a5, a4
; RV32ZBA-NEXT: xor a4, a6, a4
; RV32ZBA-NEXT: or a4, a4, a5
-; RV32ZBA-NEXT: bnez a4, .LBB46_2
-; RV32ZBA-NEXT: # %bb.1: # %entry
+; RV32ZBA-NEXT: j .LBB46_26
+; RV32ZBA-NEXT: .LBB46_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: beq a3, a4, .LBB46_8
+; RV32ZBA-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB46_9
+; RV32ZBA-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: bgez a1, .LBB46_10
+; RV32ZBA-NEXT: j .LBB46_11
+; RV32ZBA-NEXT: .LBB46_6: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB46_13
+; RV32ZBA-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a4, a2
+; RV32ZBA-NEXT: mv a5, a3
+; RV32ZBA-NEXT: bgez a3, .LBB46_14
+; RV32ZBA-NEXT: j .LBB46_15
+; RV32ZBA-NEXT: .LBB46_8: # %overflow.no
+; RV32ZBA-NEXT: j .LBB46_27
+; RV32ZBA-NEXT: .LBB46_9:
+; RV32ZBA-NEXT: neg a4, a0
+; RV32ZBA-NEXT: snez a5, a0
+; RV32ZBA-NEXT: neg a6, a1
+; RV32ZBA-NEXT: sub a5, a6, a5
+; RV32ZBA-NEXT: bltz a1, .LBB46_11
+; RV32ZBA-NEXT: .LBB46_10: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: .LBB46_11: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB46_17
+; RV32ZBA-NEXT: # %bb.12: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a7, a2
+; RV32ZBA-NEXT: mv a6, a3
+; RV32ZBA-NEXT: j .LBB46_18
+; RV32ZBA-NEXT: .LBB46_13:
+; RV32ZBA-NEXT: neg a4, a2
+; RV32ZBA-NEXT: snez a5, a2
+; RV32ZBA-NEXT: neg a6, a3
+; RV32ZBA-NEXT: sub a5, a6, a5
+; RV32ZBA-NEXT: bltz a3, .LBB46_15
+; RV32ZBA-NEXT: .LBB46_14: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a5, a3
+; RV32ZBA-NEXT: mv a4, a2
+; RV32ZBA-NEXT: .LBB46_15: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB46_21
+; RV32ZBA-NEXT: # %bb.16: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a7, a0
+; RV32ZBA-NEXT: mv a6, a1
+; RV32ZBA-NEXT: j .LBB46_22
+; RV32ZBA-NEXT: .LBB46_17:
+; RV32ZBA-NEXT: neg a7, a2
+; RV32ZBA-NEXT: snez a6, a2
+; RV32ZBA-NEXT: neg t0, a3
+; RV32ZBA-NEXT: sub a6, t0, a6
+; RV32ZBA-NEXT: .LBB46_18: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: slti t0, a1, 0
+; RV32ZBA-NEXT: slti t1, a3, 0
+; RV32ZBA-NEXT: bltz a3, .LBB46_20
+; RV32ZBA-NEXT: # %bb.19: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a6, a3
+; RV32ZBA-NEXT: mv a7, a2
+; RV32ZBA-NEXT: .LBB46_20: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu t2, a4, a7
+; RV32ZBA-NEXT: mul t3, a5, a7
+; RV32ZBA-NEXT: mul a7, a4, a7
+; RV32ZBA-NEXT: mul a5, a5, a6
+; RV32ZBA-NEXT: mulhu t4, a4, a6
+; RV32ZBA-NEXT: mul a4, a4, a6
+; RV32ZBA-NEXT: xor a6, t1, t0
+; RV32ZBA-NEXT: j .LBB46_25
+; RV32ZBA-NEXT: .LBB46_21:
+; RV32ZBA-NEXT: neg a7, a0
+; RV32ZBA-NEXT: snez a6, a0
+; RV32ZBA-NEXT: neg t0, a1
+; RV32ZBA-NEXT: sub a6, t0, a6
+; RV32ZBA-NEXT: .LBB46_22: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: slti t0, a3, 0
+; RV32ZBA-NEXT: slti t1, a1, 0
+; RV32ZBA-NEXT: bltz a1, .LBB46_24
+; RV32ZBA-NEXT: # %bb.23: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a6, a1
+; RV32ZBA-NEXT: mv a7, a0
+; RV32ZBA-NEXT: .LBB46_24: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu t2, a4, a7
+; RV32ZBA-NEXT: mul t3, a5, a7
+; RV32ZBA-NEXT: mul a7, a4, a7
+; RV32ZBA-NEXT: mul a5, a5, a6
+; RV32ZBA-NEXT: mulhu t4, a4, a6
+; RV32ZBA-NEXT: mul a4, a4, a6
+; RV32ZBA-NEXT: xor a6, t0, t1
+; RV32ZBA-NEXT: .LBB46_25: # %overflow.res
+; RV32ZBA-NEXT: add t2, t2, t3
+; RV32ZBA-NEXT: add a5, t4, a5
+; RV32ZBA-NEXT: neg t0, a6
+; RV32ZBA-NEXT: add a4, t2, a4
+; RV32ZBA-NEXT: xor a7, a7, t0
+; RV32ZBA-NEXT: sltu t1, a4, t2
+; RV32ZBA-NEXT: add a7, a7, a6
+; RV32ZBA-NEXT: xor a4, a4, t0
+; RV32ZBA-NEXT: add a5, a5, t1
+; RV32ZBA-NEXT: sltu a6, a7, a6
+; RV32ZBA-NEXT: add a4, a4, a6
+; RV32ZBA-NEXT: sltu a4, a4, a6
+; RV32ZBA-NEXT: xor a5, a5, t0
+; RV32ZBA-NEXT: add a4, a5, a4
+; RV32ZBA-NEXT: .LBB46_26: # %overflow.res
+; RV32ZBA-NEXT: snez a4, a4
+; RV32ZBA-NEXT: andi a4, a4, 1
+; RV32ZBA-NEXT: bnez a4, .LBB46_28
+; RV32ZBA-NEXT: .LBB46_27: # %overflow.res
; RV32ZBA-NEXT: mv a0, a2
; RV32ZBA-NEXT: mv a1, a3
-; RV32ZBA-NEXT: .LBB46_2: # %entry
+; RV32ZBA-NEXT: .LBB46_28: # %overflow.res
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: smulo.select.i64:
@@ -3312,7 +4213,13 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: smulo.select.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: srai a5, a0, 31
+; RV32ZICOND-NEXT: srai a4, a2, 31
+; RV32ZICOND-NEXT: beq a1, a5, .LBB46_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beq a3, a4, .LBB46_5
+; RV32ZICOND-NEXT: # %bb.2: # %overflow
; RV32ZICOND-NEXT: mulhu a4, a0, a2
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: mulhsu a6, a1, a2
@@ -3335,11 +4242,99 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: srai a4, a4, 31
; RV32ZICOND-NEXT: add a6, a7, a6
; RV32ZICOND-NEXT: sltu a7, a6, a7
-; RV32ZICOND-NEXT: xor a6, a6, a4
; RV32ZICOND-NEXT: add a5, t0, a5
; RV32ZICOND-NEXT: add a5, a5, a7
-; RV32ZICOND-NEXT: xor a4, a5, a4
-; RV32ZICOND-NEXT: or a4, a6, a4
+; RV32ZICOND-NEXT: xor a5, a5, a4
+; RV32ZICOND-NEXT: xor a4, a6, a4
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: j .LBB46_7
+; RV32ZICOND-NEXT: .LBB46_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beq a3, a4, .LBB46_8
+; RV32ZICOND-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: slti a4, a1, 0
+; RV32ZICOND-NEXT: neg a5, a0
+; RV32ZICOND-NEXT: snez a6, a0
+; RV32ZICOND-NEXT: neg a7, a1
+; RV32ZICOND-NEXT: slti t0, a3, 0
+; RV32ZICOND-NEXT: neg t1, a2
+; RV32ZICOND-NEXT: snez t2, a2
+; RV32ZICOND-NEXT: neg t3, a3
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: czero.nez t4, a0, a4
+; RV32ZICOND-NEXT: sub a6, a7, a6
+; RV32ZICOND-NEXT: czero.nez a7, a1, a4
+; RV32ZICOND-NEXT: czero.eqz t1, t1, t0
+; RV32ZICOND-NEXT: sub t2, t3, t2
+; RV32ZICOND-NEXT: czero.nez t3, a2, t0
+; RV32ZICOND-NEXT: or a5, a5, t4
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: or a5, a5, t4
+; RV32ZICOND-NEXT: czero.nez t4, a3, t0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: or a6, a6, a7
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: xor a4, t0, a4
+; RV32ZICOND-NEXT: j .LBB46_6
+; RV32ZICOND-NEXT: .LBB46_5: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: slti a4, a3, 0
+; RV32ZICOND-NEXT: neg a5, a2
+; RV32ZICOND-NEXT: snez a6, a2
+; RV32ZICOND-NEXT: neg a7, a3
+; RV32ZICOND-NEXT: slti t0, a1, 0
+; RV32ZICOND-NEXT: neg t1, a0
+; RV32ZICOND-NEXT: snez t2, a0
+; RV32ZICOND-NEXT: neg t3, a1
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: czero.nez t4, a2, a4
+; RV32ZICOND-NEXT: sub a6, a7, a6
+; RV32ZICOND-NEXT: czero.nez a7, a3, a4
+; RV32ZICOND-NEXT: czero.eqz t1, t1, t0
+; RV32ZICOND-NEXT: sub t2, t3, t2
+; RV32ZICOND-NEXT: czero.nez t3, a0, t0
+; RV32ZICOND-NEXT: or a5, a5, t4
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: or a5, a5, t4
+; RV32ZICOND-NEXT: czero.nez t4, a1, t0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: or a6, a6, a7
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: xor a4, a4, t0
+; RV32ZICOND-NEXT: .LBB46_6: # %overflow.res
+; RV32ZICOND-NEXT: or t1, t1, t3
+; RV32ZICOND-NEXT: czero.eqz t2, t2, t0
+; RV32ZICOND-NEXT: or t2, t2, t4
+; RV32ZICOND-NEXT: czero.eqz t1, t1, t0
+; RV32ZICOND-NEXT: czero.eqz t0, t2, t0
+; RV32ZICOND-NEXT: or t1, t1, t3
+; RV32ZICOND-NEXT: or a6, a6, a7
+; RV32ZICOND-NEXT: or a7, t0, t4
+; RV32ZICOND-NEXT: mulhu t0, a5, t1
+; RV32ZICOND-NEXT: mul t2, a5, t1
+; RV32ZICOND-NEXT: mul t1, a6, t1
+; RV32ZICOND-NEXT: mul a6, a6, a7
+; RV32ZICOND-NEXT: mulhu t3, a5, a7
+; RV32ZICOND-NEXT: mul a5, a5, a7
+; RV32ZICOND-NEXT: neg a7, a4
+; RV32ZICOND-NEXT: xor t2, t2, a7
+; RV32ZICOND-NEXT: add t0, t0, t1
+; RV32ZICOND-NEXT: add a6, t3, a6
+; RV32ZICOND-NEXT: add t2, t2, a4
+; RV32ZICOND-NEXT: add a5, t0, a5
+; RV32ZICOND-NEXT: sltu a4, t2, a4
+; RV32ZICOND-NEXT: sltu t0, a5, t0
+; RV32ZICOND-NEXT: xor a5, a5, a7
+; RV32ZICOND-NEXT: add a6, a6, t0
+; RV32ZICOND-NEXT: add a5, a5, a4
+; RV32ZICOND-NEXT: sltu a4, a5, a4
+; RV32ZICOND-NEXT: xor a5, a6, a7
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: .LBB46_7: # %overflow.res
+; RV32ZICOND-NEXT: snez a4, a4
+; RV32ZICOND-NEXT: j .LBB46_9
+; RV32ZICOND-NEXT: .LBB46_8: # %overflow.no
+; RV32ZICOND-NEXT: li a4, 0
+; RV32ZICOND-NEXT: .LBB46_9: # %overflow.res
+; RV32ZICOND-NEXT: andi a4, a4, 1
; RV32ZICOND-NEXT: czero.nez a2, a2, a4
; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: czero.nez a3, a3, a4
@@ -3367,7 +4362,13 @@ entry:
define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
; RV32-LABEL: smulo.not.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: srai a5, a0, 31
+; RV32-NEXT: srai a4, a2, 31
+; RV32-NEXT: beq a1, a5, .LBB47_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beq a3, a4, .LBB47_6
+; RV32-NEXT: # %bb.2: # %overflow
; RV32-NEXT: mulhu a4, a0, a2
; RV32-NEXT: mul a5, a1, a2
; RV32-NEXT: mulhsu a2, a1, a2
@@ -3395,27 +4396,154 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
; RV32-NEXT: xor a0, a0, a4
; RV32-NEXT: xor a4, a5, a4
; RV32-NEXT: or a0, a4, a0
-; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: j .LBB47_25
+; RV32-NEXT: .LBB47_3: # %overflow.no.lhs
+; RV32-NEXT: beq a3, a4, .LBB47_8
+; RV32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a1, .LBB47_9
+; RV32-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: bgez a1, .LBB47_10
+; RV32-NEXT: j .LBB47_11
+; RV32-NEXT: .LBB47_6: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a3, .LBB47_13
+; RV32-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: bgez a3, .LBB47_14
+; RV32-NEXT: j .LBB47_15
+; RV32-NEXT: .LBB47_8: # %overflow.no
+; RV32-NEXT: li a0, 1
; RV32-NEXT: ret
-;
-; RV64-LABEL: smulo.not.i64:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: mulh a2, a0, a1
-; RV64-NEXT: mul a0, a0, a1
-; RV64-NEXT: srai a0, a0, 63
-; RV64-NEXT: xor a0, a2, a0
-; RV64-NEXT: seqz a0, a0
-; RV64-NEXT: ret
-;
-; RV32ZBA-LABEL: smulo.not.i64:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: mulhu a4, a0, a2
-; RV32ZBA-NEXT: mul a5, a1, a2
-; RV32ZBA-NEXT: mulhsu a2, a1, a2
-; RV32ZBA-NEXT: mul a6, a3, a0
-; RV32ZBA-NEXT: mulhsu a0, a3, a0
-; RV32ZBA-NEXT: mulh a7, a1, a3
-; RV32ZBA-NEXT: mul a1, a1, a3
+; RV32-NEXT: .LBB47_9:
+; RV32-NEXT: neg a4, a0
+; RV32-NEXT: snez a5, a0
+; RV32-NEXT: neg a6, a1
+; RV32-NEXT: sub a5, a6, a5
+; RV32-NEXT: bltz a1, .LBB47_11
+; RV32-NEXT: .LBB47_10: # %overflow.no.lhs.only
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: .LBB47_11: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a3, .LBB47_17
+; RV32-NEXT: # %bb.12: # %overflow.no.lhs.only
+; RV32-NEXT: mv a6, a2
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: j .LBB47_18
+; RV32-NEXT: .LBB47_13:
+; RV32-NEXT: neg a4, a2
+; RV32-NEXT: snez a5, a2
+; RV32-NEXT: neg a6, a3
+; RV32-NEXT: sub a5, a6, a5
+; RV32-NEXT: bltz a3, .LBB47_15
+; RV32-NEXT: .LBB47_14: # %overflow.no.rhs.only
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: .LBB47_15: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a1, .LBB47_21
+; RV32-NEXT: # %bb.16: # %overflow.no.rhs.only
+; RV32-NEXT: mv a6, a0
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: j .LBB47_22
+; RV32-NEXT: .LBB47_17:
+; RV32-NEXT: neg a6, a2
+; RV32-NEXT: snez a0, a2
+; RV32-NEXT: neg a7, a3
+; RV32-NEXT: sub a0, a7, a0
+; RV32-NEXT: .LBB47_18: # %overflow.no.lhs.only
+; RV32-NEXT: slti a1, a1, 0
+; RV32-NEXT: slti a7, a3, 0
+; RV32-NEXT: bltz a3, .LBB47_20
+; RV32-NEXT: # %bb.19: # %overflow.no.lhs.only
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: mv a6, a2
+; RV32-NEXT: .LBB47_20: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu a2, a4, a6
+; RV32-NEXT: mul a3, a5, a6
+; RV32-NEXT: mul a6, a4, a6
+; RV32-NEXT: mul a5, a5, a0
+; RV32-NEXT: mulhu t0, a4, a0
+; RV32-NEXT: mul a0, a4, a0
+; RV32-NEXT: xor a1, a7, a1
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: add a5, t0, a5
+; RV32-NEXT: neg a3, a1
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: xor a4, a6, a3
+; RV32-NEXT: sltu a2, a0, a2
+; RV32-NEXT: add a4, a4, a1
+; RV32-NEXT: xor a0, a0, a3
+; RV32-NEXT: add a2, a5, a2
+; RV32-NEXT: sltu a1, a4, a1
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: xor a2, a2, a3
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: j .LBB47_25
+; RV32-NEXT: .LBB47_21:
+; RV32-NEXT: neg a6, a0
+; RV32-NEXT: snez a2, a0
+; RV32-NEXT: neg a7, a1
+; RV32-NEXT: sub a2, a7, a2
+; RV32-NEXT: .LBB47_22: # %overflow.no.rhs.only
+; RV32-NEXT: slti a3, a3, 0
+; RV32-NEXT: slti a7, a1, 0
+; RV32-NEXT: bltz a1, .LBB47_24
+; RV32-NEXT: # %bb.23: # %overflow.no.rhs.only
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a6, a0
+; RV32-NEXT: .LBB47_24: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu a0, a4, a6
+; RV32-NEXT: mul a1, a5, a6
+; RV32-NEXT: mul a6, a4, a6
+; RV32-NEXT: mul a5, a5, a2
+; RV32-NEXT: mulhu t0, a4, a2
+; RV32-NEXT: mul a2, a4, a2
+; RV32-NEXT: xor a3, a3, a7
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: add a5, t0, a5
+; RV32-NEXT: neg a1, a3
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: xor a4, a6, a1
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a4, a4, a3
+; RV32-NEXT: xor a2, a2, a1
+; RV32-NEXT: add a0, a5, a0
+; RV32-NEXT: sltu a3, a4, a3
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: sltu a2, a2, a3
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: add a0, a0, a2
+; RV32-NEXT: .LBB47_25: # %overflow.res
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: smulo.not.i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: mulh a2, a0, a1
+; RV64-NEXT: mul a0, a0, a1
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: xor a0, a2, a0
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: ret
+;
+; RV32ZBA-LABEL: smulo.not.i64:
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: srai a5, a0, 31
+; RV32ZBA-NEXT: srai a4, a2, 31
+; RV32ZBA-NEXT: beq a1, a5, .LBB47_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beq a3, a4, .LBB47_6
+; RV32ZBA-NEXT: # %bb.2: # %overflow
+; RV32ZBA-NEXT: mulhu a4, a0, a2
+; RV32ZBA-NEXT: mul a5, a1, a2
+; RV32ZBA-NEXT: mulhsu a2, a1, a2
+; RV32ZBA-NEXT: mul a6, a3, a0
+; RV32ZBA-NEXT: mulhsu a0, a3, a0
+; RV32ZBA-NEXT: mulh a7, a1, a3
+; RV32ZBA-NEXT: mul a1, a1, a3
; RV32ZBA-NEXT: add a4, a5, a4
; RV32ZBA-NEXT: sltu a3, a4, a5
; RV32ZBA-NEXT: add a4, a6, a4
@@ -3436,7 +4564,128 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
; RV32ZBA-NEXT: xor a0, a0, a4
; RV32ZBA-NEXT: xor a4, a5, a4
; RV32ZBA-NEXT: or a0, a4, a0
-; RV32ZBA-NEXT: seqz a0, a0
+; RV32ZBA-NEXT: j .LBB47_25
+; RV32ZBA-NEXT: .LBB47_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: beq a3, a4, .LBB47_8
+; RV32ZBA-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB47_9
+; RV32ZBA-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: bgez a1, .LBB47_10
+; RV32ZBA-NEXT: j .LBB47_11
+; RV32ZBA-NEXT: .LBB47_6: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB47_13
+; RV32ZBA-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a4, a2
+; RV32ZBA-NEXT: mv a5, a3
+; RV32ZBA-NEXT: bgez a3, .LBB47_14
+; RV32ZBA-NEXT: j .LBB47_15
+; RV32ZBA-NEXT: .LBB47_8: # %overflow.no
+; RV32ZBA-NEXT: li a0, 1
+; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB47_9:
+; RV32ZBA-NEXT: neg a4, a0
+; RV32ZBA-NEXT: snez a5, a0
+; RV32ZBA-NEXT: neg a6, a1
+; RV32ZBA-NEXT: sub a5, a6, a5
+; RV32ZBA-NEXT: bltz a1, .LBB47_11
+; RV32ZBA-NEXT: .LBB47_10: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: .LBB47_11: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB47_17
+; RV32ZBA-NEXT: # %bb.12: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a6, a2
+; RV32ZBA-NEXT: mv a0, a3
+; RV32ZBA-NEXT: j .LBB47_18
+; RV32ZBA-NEXT: .LBB47_13:
+; RV32ZBA-NEXT: neg a4, a2
+; RV32ZBA-NEXT: snez a5, a2
+; RV32ZBA-NEXT: neg a6, a3
+; RV32ZBA-NEXT: sub a5, a6, a5
+; RV32ZBA-NEXT: bltz a3, .LBB47_15
+; RV32ZBA-NEXT: .LBB47_14: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a5, a3
+; RV32ZBA-NEXT: mv a4, a2
+; RV32ZBA-NEXT: .LBB47_15: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB47_21
+; RV32ZBA-NEXT: # %bb.16: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a6, a0
+; RV32ZBA-NEXT: mv a2, a1
+; RV32ZBA-NEXT: j .LBB47_22
+; RV32ZBA-NEXT: .LBB47_17:
+; RV32ZBA-NEXT: neg a6, a2
+; RV32ZBA-NEXT: snez a0, a2
+; RV32ZBA-NEXT: neg a7, a3
+; RV32ZBA-NEXT: sub a0, a7, a0
+; RV32ZBA-NEXT: .LBB47_18: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: slti a1, a1, 0
+; RV32ZBA-NEXT: slti a7, a3, 0
+; RV32ZBA-NEXT: bltz a3, .LBB47_20
+; RV32ZBA-NEXT: # %bb.19: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a0, a3
+; RV32ZBA-NEXT: mv a6, a2
+; RV32ZBA-NEXT: .LBB47_20: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu a2, a4, a6
+; RV32ZBA-NEXT: mul a3, a5, a6
+; RV32ZBA-NEXT: mul a6, a4, a6
+; RV32ZBA-NEXT: mul a5, a5, a0
+; RV32ZBA-NEXT: mulhu t0, a4, a0
+; RV32ZBA-NEXT: mul a0, a4, a0
+; RV32ZBA-NEXT: xor a1, a7, a1
+; RV32ZBA-NEXT: add a2, a2, a3
+; RV32ZBA-NEXT: add a5, t0, a5
+; RV32ZBA-NEXT: neg a3, a1
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: xor a4, a6, a3
+; RV32ZBA-NEXT: sltu a2, a0, a2
+; RV32ZBA-NEXT: add a4, a4, a1
+; RV32ZBA-NEXT: xor a0, a0, a3
+; RV32ZBA-NEXT: add a2, a5, a2
+; RV32ZBA-NEXT: sltu a1, a4, a1
+; RV32ZBA-NEXT: add a0, a0, a1
+; RV32ZBA-NEXT: sltu a0, a0, a1
+; RV32ZBA-NEXT: xor a2, a2, a3
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: j .LBB47_25
+; RV32ZBA-NEXT: .LBB47_21:
+; RV32ZBA-NEXT: neg a6, a0
+; RV32ZBA-NEXT: snez a2, a0
+; RV32ZBA-NEXT: neg a7, a1
+; RV32ZBA-NEXT: sub a2, a7, a2
+; RV32ZBA-NEXT: .LBB47_22: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: slti a3, a3, 0
+; RV32ZBA-NEXT: slti a7, a1, 0
+; RV32ZBA-NEXT: bltz a1, .LBB47_24
+; RV32ZBA-NEXT: # %bb.23: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a2, a1
+; RV32ZBA-NEXT: mv a6, a0
+; RV32ZBA-NEXT: .LBB47_24: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu a0, a4, a6
+; RV32ZBA-NEXT: mul a1, a5, a6
+; RV32ZBA-NEXT: mul a6, a4, a6
+; RV32ZBA-NEXT: mul a5, a5, a2
+; RV32ZBA-NEXT: mulhu t0, a4, a2
+; RV32ZBA-NEXT: mul a2, a4, a2
+; RV32ZBA-NEXT: xor a3, a3, a7
+; RV32ZBA-NEXT: add a0, a0, a1
+; RV32ZBA-NEXT: add a5, t0, a5
+; RV32ZBA-NEXT: neg a1, a3
+; RV32ZBA-NEXT: add a2, a0, a2
+; RV32ZBA-NEXT: xor a4, a6, a1
+; RV32ZBA-NEXT: sltu a0, a2, a0
+; RV32ZBA-NEXT: add a4, a4, a3
+; RV32ZBA-NEXT: xor a2, a2, a1
+; RV32ZBA-NEXT: add a0, a5, a0
+; RV32ZBA-NEXT: sltu a3, a4, a3
+; RV32ZBA-NEXT: add a2, a2, a3
+; RV32ZBA-NEXT: sltu a2, a2, a3
+; RV32ZBA-NEXT: xor a0, a0, a1
+; RV32ZBA-NEXT: add a0, a0, a2
+; RV32ZBA-NEXT: .LBB47_25: # %overflow.res
+; RV32ZBA-NEXT: snez a0, a0
+; RV32ZBA-NEXT: xori a0, a0, 1
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: smulo.not.i64:
@@ -3449,7 +4698,13 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: smulo.not.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: srai a5, a0, 31
+; RV32ZICOND-NEXT: srai a4, a2, 31
+; RV32ZICOND-NEXT: beq a1, a5, .LBB47_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beq a3, a4, .LBB47_5
+; RV32ZICOND-NEXT: # %bb.2: # %overflow
; RV32ZICOND-NEXT: mulhu a4, a0, a2
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: mulhsu a2, a1, a2
@@ -3477,7 +4732,120 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: xor a0, a0, a4
; RV32ZICOND-NEXT: xor a4, a5, a4
; RV32ZICOND-NEXT: or a0, a4, a0
-; RV32ZICOND-NEXT: seqz a0, a0
+; RV32ZICOND-NEXT: j .LBB47_6
+; RV32ZICOND-NEXT: .LBB47_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beq a3, a4, .LBB47_7
+; RV32ZICOND-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: slti a4, a1, 0
+; RV32ZICOND-NEXT: neg a5, a0
+; RV32ZICOND-NEXT: snez a6, a0
+; RV32ZICOND-NEXT: neg a7, a1
+; RV32ZICOND-NEXT: snez t0, a2
+; RV32ZICOND-NEXT: sub a6, a7, a6
+; RV32ZICOND-NEXT: neg a7, a3
+; RV32ZICOND-NEXT: sub a7, a7, t0
+; RV32ZICOND-NEXT: slti t0, a3, 0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: czero.nez a0, a0, a4
+; RV32ZICOND-NEXT: or a5, a5, a0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: or a0, a5, a0
+; RV32ZICOND-NEXT: neg a5, a2
+; RV32ZICOND-NEXT: czero.nez a1, a1, a4
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.nez a2, a2, t0
+; RV32ZICOND-NEXT: czero.nez a3, a3, t0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: or a6, a6, a1
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: xor a4, t0, a4
+; RV32ZICOND-NEXT: or a5, a5, a2
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: or a7, a7, a3
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: neg t0, a4
+; RV32ZICOND-NEXT: or a2, a5, a2
+; RV32ZICOND-NEXT: or a1, a6, a1
+; RV32ZICOND-NEXT: or a3, a7, a3
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
+; RV32ZICOND-NEXT: mul a6, a0, a2
+; RV32ZICOND-NEXT: mul a2, a1, a2
+; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: mulhu a7, a0, a3
+; RV32ZICOND-NEXT: mul a0, a0, a3
+; RV32ZICOND-NEXT: xor a3, a6, t0
+; RV32ZICOND-NEXT: add a2, a5, a2
+; RV32ZICOND-NEXT: add a1, a7, a1
+; RV32ZICOND-NEXT: add a3, a3, a4
+; RV32ZICOND-NEXT: add a0, a2, a0
+; RV32ZICOND-NEXT: sltu a3, a3, a4
+; RV32ZICOND-NEXT: sltu a2, a0, a2
+; RV32ZICOND-NEXT: xor a0, a0, t0
+; RV32ZICOND-NEXT: add a1, a1, a2
+; RV32ZICOND-NEXT: add a0, a0, a3
+; RV32ZICOND-NEXT: sltu a0, a0, a3
+; RV32ZICOND-NEXT: xor a1, a1, t0
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: j .LBB47_6
+; RV32ZICOND-NEXT: .LBB47_5: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: slti a4, a3, 0
+; RV32ZICOND-NEXT: neg a5, a2
+; RV32ZICOND-NEXT: snez a6, a2
+; RV32ZICOND-NEXT: neg a7, a3
+; RV32ZICOND-NEXT: snez t0, a0
+; RV32ZICOND-NEXT: sub a6, a7, a6
+; RV32ZICOND-NEXT: neg a7, a1
+; RV32ZICOND-NEXT: sub a7, a7, t0
+; RV32ZICOND-NEXT: slti t0, a1, 0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: czero.nez a2, a2, a4
+; RV32ZICOND-NEXT: or a5, a5, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: or a2, a5, a2
+; RV32ZICOND-NEXT: neg a5, a0
+; RV32ZICOND-NEXT: czero.nez a3, a3, a4
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.nez a0, a0, t0
+; RV32ZICOND-NEXT: czero.nez a1, a1, t0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: or a6, a6, a3
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: xor a4, a4, t0
+; RV32ZICOND-NEXT: or a5, a5, a0
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: or a7, a7, a1
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: neg t0, a4
+; RV32ZICOND-NEXT: or a0, a5, a0
+; RV32ZICOND-NEXT: or a3, a6, a3
+; RV32ZICOND-NEXT: or a1, a7, a1
+; RV32ZICOND-NEXT: mulhu a5, a2, a0
+; RV32ZICOND-NEXT: mul a6, a2, a0
+; RV32ZICOND-NEXT: mul a0, a3, a0
+; RV32ZICOND-NEXT: mul a3, a3, a1
+; RV32ZICOND-NEXT: mulhu a7, a2, a1
+; RV32ZICOND-NEXT: mul a1, a2, a1
+; RV32ZICOND-NEXT: xor a2, a6, t0
+; RV32ZICOND-NEXT: add a0, a5, a0
+; RV32ZICOND-NEXT: add a3, a7, a3
+; RV32ZICOND-NEXT: add a2, a2, a4
+; RV32ZICOND-NEXT: add a1, a0, a1
+; RV32ZICOND-NEXT: sltu a2, a2, a4
+; RV32ZICOND-NEXT: sltu a0, a1, a0
+; RV32ZICOND-NEXT: xor a1, a1, t0
+; RV32ZICOND-NEXT: add a0, a3, a0
+; RV32ZICOND-NEXT: add a1, a1, a2
+; RV32ZICOND-NEXT: sltu a1, a1, a2
+; RV32ZICOND-NEXT: xor a0, a0, t0
+; RV32ZICOND-NEXT: add a0, a0, a1
+; RV32ZICOND-NEXT: .LBB47_6: # %overflow.res
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: xori a0, a0, 1
+; RV32ZICOND-NEXT: ret
+; RV32ZICOND-NEXT: .LBB47_7: # %overflow.no
+; RV32ZICOND-NEXT: li a0, 1
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: smulo.not.i64:
@@ -3617,7 +4985,11 @@ entry:
define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV32-LABEL: umulo.select.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: beqz a1, .LBB50_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beqz a3, .LBB50_5
+; RV32-NEXT: # %bb.2: # %overflow
; RV32-NEXT: mul a4, a3, a0
; RV32-NEXT: mul a5, a1, a2
; RV32-NEXT: snez a6, a3
@@ -3634,12 +5006,42 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV32-NEXT: snez a6, a6
; RV32-NEXT: or a5, a5, a6
; RV32-NEXT: or a4, a5, a4
-; RV32-NEXT: bnez a4, .LBB50_2
-; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: andi a4, a4, 1
+; RV32-NEXT: beqz a4, .LBB50_7
+; RV32-NEXT: j .LBB50_8
+; RV32-NEXT: .LBB50_3: # %overflow.no.lhs
+; RV32-NEXT: beqz a3, .LBB50_9
+; RV32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu a4, a0, a2
+; RV32-NEXT: mul a5, a1, a2
+; RV32-NEXT: mulhu a6, a0, a3
+; RV32-NEXT: add a4, a4, a5
+; RV32-NEXT: mul a5, a1, a3
+; RV32-NEXT: add a5, a6, a5
+; RV32-NEXT: mul a6, a0, a3
+; RV32-NEXT: j .LBB50_6
+; RV32-NEXT: .LBB50_5: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu a4, a2, a0
+; RV32-NEXT: mul a5, a3, a0
+; RV32-NEXT: mulhu a6, a2, a1
+; RV32-NEXT: add a4, a4, a5
+; RV32-NEXT: mul a5, a3, a1
+; RV32-NEXT: add a5, a6, a5
+; RV32-NEXT: mul a6, a2, a1
+; RV32-NEXT: .LBB50_6: # %overflow.res
+; RV32-NEXT: add a6, a4, a6
+; RV32-NEXT: sltu a4, a6, a4
+; RV32-NEXT: add a4, a5, a4
+; RV32-NEXT: snez a4, a4
+; RV32-NEXT: andi a4, a4, 1
+; RV32-NEXT: bnez a4, .LBB50_8
+; RV32-NEXT: .LBB50_7: # %overflow.res
; RV32-NEXT: mv a0, a2
; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB50_2: # %entry
+; RV32-NEXT: .LBB50_8: # %overflow.res
; RV32-NEXT: ret
+; RV32-NEXT: .LBB50_9: # %overflow.no
+; RV32-NEXT: j .LBB50_7
;
; RV64-LABEL: umulo.select.i64:
; RV64: # %bb.0: # %entry
@@ -3651,7 +5053,11 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: umulo.select.i64:
-; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: beqz a1, .LBB50_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB50_5
+; RV32ZBA-NEXT: # %bb.2: # %overflow
; RV32ZBA-NEXT: mul a4, a3, a0
; RV32ZBA-NEXT: mul a5, a1, a2
; RV32ZBA-NEXT: snez a6, a3
@@ -3668,12 +5074,42 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV32ZBA-NEXT: snez a6, a6
; RV32ZBA-NEXT: or a5, a5, a6
; RV32ZBA-NEXT: or a4, a5, a4
-; RV32ZBA-NEXT: bnez a4, .LBB50_2
-; RV32ZBA-NEXT: # %bb.1: # %entry
+; RV32ZBA-NEXT: andi a4, a4, 1
+; RV32ZBA-NEXT: beqz a4, .LBB50_7
+; RV32ZBA-NEXT: j .LBB50_8
+; RV32ZBA-NEXT: .LBB50_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB50_9
+; RV32ZBA-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu a4, a0, a2
+; RV32ZBA-NEXT: mul a5, a1, a2
+; RV32ZBA-NEXT: mulhu a6, a0, a3
+; RV32ZBA-NEXT: add a4, a4, a5
+; RV32ZBA-NEXT: mul a5, a1, a3
+; RV32ZBA-NEXT: add a5, a6, a5
+; RV32ZBA-NEXT: mul a6, a0, a3
+; RV32ZBA-NEXT: j .LBB50_6
+; RV32ZBA-NEXT: .LBB50_5: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu a4, a2, a0
+; RV32ZBA-NEXT: mul a5, a3, a0
+; RV32ZBA-NEXT: mulhu a6, a2, a1
+; RV32ZBA-NEXT: add a4, a4, a5
+; RV32ZBA-NEXT: mul a5, a3, a1
+; RV32ZBA-NEXT: add a5, a6, a5
+; RV32ZBA-NEXT: mul a6, a2, a1
+; RV32ZBA-NEXT: .LBB50_6: # %overflow.res
+; RV32ZBA-NEXT: add a6, a4, a6
+; RV32ZBA-NEXT: sltu a4, a6, a4
+; RV32ZBA-NEXT: add a4, a5, a4
+; RV32ZBA-NEXT: snez a4, a4
+; RV32ZBA-NEXT: andi a4, a4, 1
+; RV32ZBA-NEXT: bnez a4, .LBB50_8
+; RV32ZBA-NEXT: .LBB50_7: # %overflow.res
; RV32ZBA-NEXT: mv a0, a2
; RV32ZBA-NEXT: mv a1, a3
-; RV32ZBA-NEXT: .LBB50_2: # %entry
+; RV32ZBA-NEXT: .LBB50_8: # %overflow.res
; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB50_9: # %overflow.no
+; RV32ZBA-NEXT: j .LBB50_7
;
; RV64ZBA-LABEL: umulo.select.i64:
; RV64ZBA: # %bb.0: # %entry
@@ -3685,7 +5121,11 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: umulo.select.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: beqz a1, .LBB50_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB50_5
+; RV32ZICOND-NEXT: # %bb.2: # %overflow
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: snez a6, a3
@@ -3702,6 +5142,36 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: snez a6, a6
; RV32ZICOND-NEXT: or a5, a5, a6
; RV32ZICOND-NEXT: or a4, a5, a4
+; RV32ZICOND-NEXT: j .LBB50_8
+; RV32ZICOND-NEXT: .LBB50_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB50_7
+; RV32ZICOND-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: mulhu a4, a0, a2
+; RV32ZICOND-NEXT: mul a5, a1, a2
+; RV32ZICOND-NEXT: mulhu a6, a0, a3
+; RV32ZICOND-NEXT: add a4, a4, a5
+; RV32ZICOND-NEXT: mul a5, a1, a3
+; RV32ZICOND-NEXT: add a5, a6, a5
+; RV32ZICOND-NEXT: mul a6, a0, a3
+; RV32ZICOND-NEXT: j .LBB50_6
+; RV32ZICOND-NEXT: .LBB50_5: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: mulhu a4, a2, a0
+; RV32ZICOND-NEXT: mul a5, a3, a0
+; RV32ZICOND-NEXT: mulhu a6, a2, a1
+; RV32ZICOND-NEXT: add a4, a4, a5
+; RV32ZICOND-NEXT: mul a5, a3, a1
+; RV32ZICOND-NEXT: add a5, a6, a5
+; RV32ZICOND-NEXT: mul a6, a2, a1
+; RV32ZICOND-NEXT: .LBB50_6: # %overflow.res
+; RV32ZICOND-NEXT: add a6, a4, a6
+; RV32ZICOND-NEXT: sltu a4, a6, a4
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: snez a4, a4
+; RV32ZICOND-NEXT: j .LBB50_8
+; RV32ZICOND-NEXT: .LBB50_7: # %overflow.no
+; RV32ZICOND-NEXT: li a4, 0
+; RV32ZICOND-NEXT: .LBB50_8: # %overflow.res
+; RV32ZICOND-NEXT: andi a4, a4, 1
; RV32ZICOND-NEXT: czero.nez a2, a2, a4
; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: czero.nez a3, a3, a4
@@ -3726,7 +5196,11 @@ entry:
define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV32-LABEL: umulo.not.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: beqz a1, .LBB51_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beqz a3, .LBB51_5
+; RV32-NEXT: # %bb.2: # %overflow
; RV32-NEXT: mul a4, a3, a0
; RV32-NEXT: mul a5, a1, a2
; RV32-NEXT: mulhu a6, a0, a2
@@ -3745,6 +5219,38 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: ret
+; RV32-NEXT: .LBB51_3: # %overflow.no.lhs
+; RV32-NEXT: beqz a3, .LBB51_7
+; RV32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu a4, a0, a2
+; RV32-NEXT: mul a2, a1, a2
+; RV32-NEXT: add a2, a4, a2
+; RV32-NEXT: mulhu a4, a0, a3
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, a4, a1
+; RV32-NEXT: mul a0, a0, a3
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: j .LBB51_6
+; RV32-NEXT: .LBB51_5: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu a4, a2, a0
+; RV32-NEXT: mul a0, a3, a0
+; RV32-NEXT: add a0, a4, a0
+; RV32-NEXT: mulhu a4, a2, a1
+; RV32-NEXT: mul a3, a3, a1
+; RV32-NEXT: add a3, a4, a3
+; RV32-NEXT: mul a1, a2, a1
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a3, a0
+; RV32-NEXT: .LBB51_6: # %overflow.no.rhs.only
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB51_7: # %overflow.no
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: ret
;
; RV64-LABEL: umulo.not.i64:
; RV64: # %bb.0: # %entry
@@ -3753,7 +5259,11 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: umulo.not.i64:
-; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: beqz a1, .LBB51_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB51_5
+; RV32ZBA-NEXT: # %bb.2: # %overflow
; RV32ZBA-NEXT: mul a4, a3, a0
; RV32ZBA-NEXT: mul a5, a1, a2
; RV32ZBA-NEXT: mulhu a6, a0, a2
@@ -3772,6 +5282,38 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV32ZBA-NEXT: or a0, a0, a2
; RV32ZBA-NEXT: xori a0, a0, 1
; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB51_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB51_7
+; RV32ZBA-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu a4, a0, a2
+; RV32ZBA-NEXT: mul a2, a1, a2
+; RV32ZBA-NEXT: add a2, a4, a2
+; RV32ZBA-NEXT: mulhu a4, a0, a3
+; RV32ZBA-NEXT: mul a1, a1, a3
+; RV32ZBA-NEXT: add a1, a4, a1
+; RV32ZBA-NEXT: mul a0, a0, a3
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: sltu a0, a0, a2
+; RV32ZBA-NEXT: add a0, a1, a0
+; RV32ZBA-NEXT: j .LBB51_6
+; RV32ZBA-NEXT: .LBB51_5: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu a4, a2, a0
+; RV32ZBA-NEXT: mul a0, a3, a0
+; RV32ZBA-NEXT: add a0, a4, a0
+; RV32ZBA-NEXT: mulhu a4, a2, a1
+; RV32ZBA-NEXT: mul a3, a3, a1
+; RV32ZBA-NEXT: add a3, a4, a3
+; RV32ZBA-NEXT: mul a1, a2, a1
+; RV32ZBA-NEXT: add a1, a0, a1
+; RV32ZBA-NEXT: sltu a0, a1, a0
+; RV32ZBA-NEXT: add a0, a3, a0
+; RV32ZBA-NEXT: .LBB51_6: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: snez a0, a0
+; RV32ZBA-NEXT: xori a0, a0, 1
+; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB51_7: # %overflow.no
+; RV32ZBA-NEXT: li a0, 1
+; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: umulo.not.i64:
; RV64ZBA: # %bb.0: # %entry
@@ -3780,7 +5322,11 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: umulo.not.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: beqz a1, .LBB51_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB51_5
+; RV32ZICOND-NEXT: # %bb.2: # %overflow
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: mulhu a6, a0, a2
@@ -3799,6 +5345,38 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: xori a0, a0, 1
; RV32ZICOND-NEXT: ret
+; RV32ZICOND-NEXT: .LBB51_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB51_7
+; RV32ZICOND-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: mulhu a4, a0, a2
+; RV32ZICOND-NEXT: mul a2, a1, a2
+; RV32ZICOND-NEXT: add a2, a4, a2
+; RV32ZICOND-NEXT: mulhu a4, a0, a3
+; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: add a1, a4, a1
+; RV32ZICOND-NEXT: mul a0, a0, a3
+; RV32ZICOND-NEXT: add a0, a2, a0
+; RV32ZICOND-NEXT: sltu a0, a0, a2
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: j .LBB51_6
+; RV32ZICOND-NEXT: .LBB51_5: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: mulhu a4, a2, a0
+; RV32ZICOND-NEXT: mul a0, a3, a0
+; RV32ZICOND-NEXT: add a0, a4, a0
+; RV32ZICOND-NEXT: mulhu a4, a2, a1
+; RV32ZICOND-NEXT: mul a3, a3, a1
+; RV32ZICOND-NEXT: add a3, a4, a3
+; RV32ZICOND-NEXT: mul a1, a2, a1
+; RV32ZICOND-NEXT: add a1, a0, a1
+; RV32ZICOND-NEXT: sltu a0, a1, a0
+; RV32ZICOND-NEXT: add a0, a3, a0
+; RV32ZICOND-NEXT: .LBB51_6: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: xori a0, a0, 1
+; RV32ZICOND-NEXT: ret
+; RV32ZICOND-NEXT: .LBB51_7: # %overflow.no
+; RV32ZICOND-NEXT: li a0, 1
+; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: umulo.not.i64:
; RV64ZICOND: # %bb.0: # %entry
@@ -4656,7 +6234,13 @@ continue:
define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
; RV32-LABEL: smulo.br.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: srai a5, a0, 31
+; RV32-NEXT: srai a4, a2, 31
+; RV32-NEXT: beq a1, a5, .LBB61_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beq a3, a4, .LBB61_6
+; RV32-NEXT: # %bb.2: # %overflow1
; RV32-NEXT: mulhu a4, a0, a2
; RV32-NEXT: mul a5, a1, a2
; RV32-NEXT: mulhsu a2, a1, a2
@@ -4684,13 +6268,133 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
; RV32-NEXT: xor a0, a0, a4
; RV32-NEXT: xor a4, a5, a4
; RV32-NEXT: or a0, a4, a0
-; RV32-NEXT: beqz a0, .LBB61_2
-; RV32-NEXT: # %bb.1: # %overflow
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB61_2: # %continue
+; RV32-NEXT: j .LBB61_26
+; RV32-NEXT: .LBB61_3: # %overflow.no.lhs
+; RV32-NEXT: beq a3, a4, .LBB61_8
+; RV32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a1, .LBB61_10
+; RV32-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: bgez a1, .LBB61_11
+; RV32-NEXT: j .LBB61_12
+; RV32-NEXT: .LBB61_6: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a3, .LBB61_14
+; RV32-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: bgez a3, .LBB61_15
+; RV32-NEXT: j .LBB61_16
+; RV32-NEXT: .LBB61_8: # %overflow.no
+; RV32-NEXT: .LBB61_9: # %continue
; RV32-NEXT: li a0, 1
; RV32-NEXT: ret
+; RV32-NEXT: .LBB61_10:
+; RV32-NEXT: neg a4, a0
+; RV32-NEXT: snez a5, a0
+; RV32-NEXT: neg a6, a1
+; RV32-NEXT: sub a5, a6, a5
+; RV32-NEXT: bltz a1, .LBB61_12
+; RV32-NEXT: .LBB61_11: # %overflow.no.lhs.only
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: .LBB61_12: # %overflow.no.lhs.only
+; RV32-NEXT: bltz a3, .LBB61_18
+; RV32-NEXT: # %bb.13: # %overflow.no.lhs.only
+; RV32-NEXT: mv a6, a2
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: j .LBB61_19
+; RV32-NEXT: .LBB61_14:
+; RV32-NEXT: neg a4, a2
+; RV32-NEXT: snez a5, a2
+; RV32-NEXT: neg a6, a3
+; RV32-NEXT: sub a5, a6, a5
+; RV32-NEXT: bltz a3, .LBB61_16
+; RV32-NEXT: .LBB61_15: # %overflow.no.rhs.only
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: .LBB61_16: # %overflow.no.rhs.only
+; RV32-NEXT: bltz a1, .LBB61_22
+; RV32-NEXT: # %bb.17: # %overflow.no.rhs.only
+; RV32-NEXT: mv a6, a0
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: j .LBB61_23
+; RV32-NEXT: .LBB61_18:
+; RV32-NEXT: neg a6, a2
+; RV32-NEXT: snez a0, a2
+; RV32-NEXT: neg a7, a3
+; RV32-NEXT: sub a0, a7, a0
+; RV32-NEXT: .LBB61_19: # %overflow.no.lhs.only
+; RV32-NEXT: slti a1, a1, 0
+; RV32-NEXT: slti a7, a3, 0
+; RV32-NEXT: bltz a3, .LBB61_21
+; RV32-NEXT: # %bb.20: # %overflow.no.lhs.only
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: mv a6, a2
+; RV32-NEXT: .LBB61_21: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu a2, a4, a6
+; RV32-NEXT: mul a3, a5, a6
+; RV32-NEXT: mul a6, a4, a6
+; RV32-NEXT: mul a5, a5, a0
+; RV32-NEXT: mulhu t0, a4, a0
+; RV32-NEXT: mul a0, a4, a0
+; RV32-NEXT: xor a1, a7, a1
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: add a5, t0, a5
+; RV32-NEXT: neg a3, a1
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: xor a4, a6, a3
+; RV32-NEXT: sltu a2, a0, a2
+; RV32-NEXT: add a4, a4, a1
+; RV32-NEXT: xor a0, a0, a3
+; RV32-NEXT: add a2, a5, a2
+; RV32-NEXT: sltu a1, a4, a1
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: xor a2, a2, a3
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: j .LBB61_26
+; RV32-NEXT: .LBB61_22:
+; RV32-NEXT: neg a6, a0
+; RV32-NEXT: snez a2, a0
+; RV32-NEXT: neg a7, a1
+; RV32-NEXT: sub a2, a7, a2
+; RV32-NEXT: .LBB61_23: # %overflow.no.rhs.only
+; RV32-NEXT: slti a3, a3, 0
+; RV32-NEXT: slti a7, a1, 0
+; RV32-NEXT: bltz a1, .LBB61_25
+; RV32-NEXT: # %bb.24: # %overflow.no.rhs.only
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a6, a0
+; RV32-NEXT: .LBB61_25: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu a0, a4, a6
+; RV32-NEXT: mul a1, a5, a6
+; RV32-NEXT: mul a6, a4, a6
+; RV32-NEXT: mul a5, a5, a2
+; RV32-NEXT: mulhu t0, a4, a2
+; RV32-NEXT: mul a2, a4, a2
+; RV32-NEXT: xor a3, a3, a7
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: add a5, t0, a5
+; RV32-NEXT: neg a1, a3
+; RV32-NEXT: add a2, a0, a2
+; RV32-NEXT: xor a4, a6, a1
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: add a4, a4, a3
+; RV32-NEXT: xor a2, a2, a1
+; RV32-NEXT: add a0, a5, a0
+; RV32-NEXT: sltu a3, a4, a3
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: sltu a2, a2, a3
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: add a0, a0, a2
+; RV32-NEXT: .LBB61_26: # %overflow.res
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: beqz a0, .LBB61_9
+; RV32-NEXT: # %bb.27: # %overflow
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
;
; RV64-LABEL: smulo.br.i64:
; RV64: # %bb.0: # %entry
@@ -4706,7 +6410,13 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: smulo.br.i64:
-; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: srai a5, a0, 31
+; RV32ZBA-NEXT: srai a4, a2, 31
+; RV32ZBA-NEXT: beq a1, a5, .LBB61_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beq a3, a4, .LBB61_6
+; RV32ZBA-NEXT: # %bb.2: # %overflow1
; RV32ZBA-NEXT: mulhu a4, a0, a2
; RV32ZBA-NEXT: mul a5, a1, a2
; RV32ZBA-NEXT: mulhsu a2, a1, a2
@@ -4734,13 +6444,133 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
; RV32ZBA-NEXT: xor a0, a0, a4
; RV32ZBA-NEXT: xor a4, a5, a4
; RV32ZBA-NEXT: or a0, a4, a0
-; RV32ZBA-NEXT: beqz a0, .LBB61_2
-; RV32ZBA-NEXT: # %bb.1: # %overflow
-; RV32ZBA-NEXT: li a0, 0
-; RV32ZBA-NEXT: ret
-; RV32ZBA-NEXT: .LBB61_2: # %continue
+; RV32ZBA-NEXT: j .LBB61_26
+; RV32ZBA-NEXT: .LBB61_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: beq a3, a4, .LBB61_8
+; RV32ZBA-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB61_10
+; RV32ZBA-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: bgez a1, .LBB61_11
+; RV32ZBA-NEXT: j .LBB61_12
+; RV32ZBA-NEXT: .LBB61_6: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB61_14
+; RV32ZBA-NEXT: # %bb.7: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a4, a2
+; RV32ZBA-NEXT: mv a5, a3
+; RV32ZBA-NEXT: bgez a3, .LBB61_15
+; RV32ZBA-NEXT: j .LBB61_16
+; RV32ZBA-NEXT: .LBB61_8: # %overflow.no
+; RV32ZBA-NEXT: .LBB61_9: # %continue
; RV32ZBA-NEXT: li a0, 1
; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB61_10:
+; RV32ZBA-NEXT: neg a4, a0
+; RV32ZBA-NEXT: snez a5, a0
+; RV32ZBA-NEXT: neg a6, a1
+; RV32ZBA-NEXT: sub a5, a6, a5
+; RV32ZBA-NEXT: bltz a1, .LBB61_12
+; RV32ZBA-NEXT: .LBB61_11: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: .LBB61_12: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: bltz a3, .LBB61_18
+; RV32ZBA-NEXT: # %bb.13: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a6, a2
+; RV32ZBA-NEXT: mv a0, a3
+; RV32ZBA-NEXT: j .LBB61_19
+; RV32ZBA-NEXT: .LBB61_14:
+; RV32ZBA-NEXT: neg a4, a2
+; RV32ZBA-NEXT: snez a5, a2
+; RV32ZBA-NEXT: neg a6, a3
+; RV32ZBA-NEXT: sub a5, a6, a5
+; RV32ZBA-NEXT: bltz a3, .LBB61_16
+; RV32ZBA-NEXT: .LBB61_15: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a5, a3
+; RV32ZBA-NEXT: mv a4, a2
+; RV32ZBA-NEXT: .LBB61_16: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: bltz a1, .LBB61_22
+; RV32ZBA-NEXT: # %bb.17: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a6, a0
+; RV32ZBA-NEXT: mv a2, a1
+; RV32ZBA-NEXT: j .LBB61_23
+; RV32ZBA-NEXT: .LBB61_18:
+; RV32ZBA-NEXT: neg a6, a2
+; RV32ZBA-NEXT: snez a0, a2
+; RV32ZBA-NEXT: neg a7, a3
+; RV32ZBA-NEXT: sub a0, a7, a0
+; RV32ZBA-NEXT: .LBB61_19: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: slti a1, a1, 0
+; RV32ZBA-NEXT: slti a7, a3, 0
+; RV32ZBA-NEXT: bltz a3, .LBB61_21
+; RV32ZBA-NEXT: # %bb.20: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mv a0, a3
+; RV32ZBA-NEXT: mv a6, a2
+; RV32ZBA-NEXT: .LBB61_21: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu a2, a4, a6
+; RV32ZBA-NEXT: mul a3, a5, a6
+; RV32ZBA-NEXT: mul a6, a4, a6
+; RV32ZBA-NEXT: mul a5, a5, a0
+; RV32ZBA-NEXT: mulhu t0, a4, a0
+; RV32ZBA-NEXT: mul a0, a4, a0
+; RV32ZBA-NEXT: xor a1, a7, a1
+; RV32ZBA-NEXT: add a2, a2, a3
+; RV32ZBA-NEXT: add a5, t0, a5
+; RV32ZBA-NEXT: neg a3, a1
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: xor a4, a6, a3
+; RV32ZBA-NEXT: sltu a2, a0, a2
+; RV32ZBA-NEXT: add a4, a4, a1
+; RV32ZBA-NEXT: xor a0, a0, a3
+; RV32ZBA-NEXT: add a2, a5, a2
+; RV32ZBA-NEXT: sltu a1, a4, a1
+; RV32ZBA-NEXT: add a0, a0, a1
+; RV32ZBA-NEXT: sltu a0, a0, a1
+; RV32ZBA-NEXT: xor a2, a2, a3
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: j .LBB61_26
+; RV32ZBA-NEXT: .LBB61_22:
+; RV32ZBA-NEXT: neg a6, a0
+; RV32ZBA-NEXT: snez a2, a0
+; RV32ZBA-NEXT: neg a7, a1
+; RV32ZBA-NEXT: sub a2, a7, a2
+; RV32ZBA-NEXT: .LBB61_23: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: slti a3, a3, 0
+; RV32ZBA-NEXT: slti a7, a1, 0
+; RV32ZBA-NEXT: bltz a1, .LBB61_25
+; RV32ZBA-NEXT: # %bb.24: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mv a2, a1
+; RV32ZBA-NEXT: mv a6, a0
+; RV32ZBA-NEXT: .LBB61_25: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu a0, a4, a6
+; RV32ZBA-NEXT: mul a1, a5, a6
+; RV32ZBA-NEXT: mul a6, a4, a6
+; RV32ZBA-NEXT: mul a5, a5, a2
+; RV32ZBA-NEXT: mulhu t0, a4, a2
+; RV32ZBA-NEXT: mul a2, a4, a2
+; RV32ZBA-NEXT: xor a3, a3, a7
+; RV32ZBA-NEXT: add a0, a0, a1
+; RV32ZBA-NEXT: add a5, t0, a5
+; RV32ZBA-NEXT: neg a1, a3
+; RV32ZBA-NEXT: add a2, a0, a2
+; RV32ZBA-NEXT: xor a4, a6, a1
+; RV32ZBA-NEXT: sltu a0, a2, a0
+; RV32ZBA-NEXT: add a4, a4, a3
+; RV32ZBA-NEXT: xor a2, a2, a1
+; RV32ZBA-NEXT: add a0, a5, a0
+; RV32ZBA-NEXT: sltu a3, a4, a3
+; RV32ZBA-NEXT: add a2, a2, a3
+; RV32ZBA-NEXT: sltu a2, a2, a3
+; RV32ZBA-NEXT: xor a0, a0, a1
+; RV32ZBA-NEXT: add a0, a0, a2
+; RV32ZBA-NEXT: .LBB61_26: # %overflow.res
+; RV32ZBA-NEXT: snez a0, a0
+; RV32ZBA-NEXT: andi a0, a0, 1
+; RV32ZBA-NEXT: beqz a0, .LBB61_9
+; RV32ZBA-NEXT: # %bb.27: # %overflow
+; RV32ZBA-NEXT: li a0, 0
+; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: smulo.br.i64:
; RV64ZBA: # %bb.0: # %entry
@@ -4756,7 +6586,13 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: smulo.br.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: srai a5, a0, 31
+; RV32ZICOND-NEXT: srai a4, a2, 31
+; RV32ZICOND-NEXT: beq a1, a5, .LBB61_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beq a3, a4, .LBB61_5
+; RV32ZICOND-NEXT: # %bb.2: # %overflow1
; RV32ZICOND-NEXT: mulhu a4, a0, a2
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: mulhsu a2, a1, a2
@@ -4784,11 +6620,123 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: xor a0, a0, a4
; RV32ZICOND-NEXT: xor a4, a5, a4
; RV32ZICOND-NEXT: or a0, a4, a0
-; RV32ZICOND-NEXT: beqz a0, .LBB61_2
-; RV32ZICOND-NEXT: # %bb.1: # %overflow
+; RV32ZICOND-NEXT: j .LBB61_6
+; RV32ZICOND-NEXT: .LBB61_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beq a3, a4, .LBB61_8
+; RV32ZICOND-NEXT: # %bb.4: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: slti a4, a1, 0
+; RV32ZICOND-NEXT: neg a5, a0
+; RV32ZICOND-NEXT: snez a6, a0
+; RV32ZICOND-NEXT: neg a7, a1
+; RV32ZICOND-NEXT: snez t0, a2
+; RV32ZICOND-NEXT: sub a6, a7, a6
+; RV32ZICOND-NEXT: neg a7, a3
+; RV32ZICOND-NEXT: sub a7, a7, t0
+; RV32ZICOND-NEXT: slti t0, a3, 0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: czero.nez a0, a0, a4
+; RV32ZICOND-NEXT: or a5, a5, a0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: or a0, a5, a0
+; RV32ZICOND-NEXT: neg a5, a2
+; RV32ZICOND-NEXT: czero.nez a1, a1, a4
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.nez a2, a2, t0
+; RV32ZICOND-NEXT: czero.nez a3, a3, t0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: or a6, a6, a1
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: xor a4, t0, a4
+; RV32ZICOND-NEXT: or a5, a5, a2
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: or a7, a7, a3
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: neg t0, a4
+; RV32ZICOND-NEXT: or a2, a5, a2
+; RV32ZICOND-NEXT: or a1, a6, a1
+; RV32ZICOND-NEXT: or a3, a7, a3
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
+; RV32ZICOND-NEXT: mul a6, a0, a2
+; RV32ZICOND-NEXT: mul a2, a1, a2
+; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: mulhu a7, a0, a3
+; RV32ZICOND-NEXT: mul a0, a0, a3
+; RV32ZICOND-NEXT: xor a3, a6, t0
+; RV32ZICOND-NEXT: add a2, a5, a2
+; RV32ZICOND-NEXT: add a1, a7, a1
+; RV32ZICOND-NEXT: add a3, a3, a4
+; RV32ZICOND-NEXT: add a0, a2, a0
+; RV32ZICOND-NEXT: sltu a3, a3, a4
+; RV32ZICOND-NEXT: sltu a2, a0, a2
+; RV32ZICOND-NEXT: xor a0, a0, t0
+; RV32ZICOND-NEXT: add a1, a1, a2
+; RV32ZICOND-NEXT: add a0, a0, a3
+; RV32ZICOND-NEXT: sltu a0, a0, a3
+; RV32ZICOND-NEXT: xor a1, a1, t0
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: j .LBB61_6
+; RV32ZICOND-NEXT: .LBB61_5: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: slti a4, a3, 0
+; RV32ZICOND-NEXT: neg a5, a2
+; RV32ZICOND-NEXT: snez a6, a2
+; RV32ZICOND-NEXT: neg a7, a3
+; RV32ZICOND-NEXT: snez t0, a0
+; RV32ZICOND-NEXT: sub a6, a7, a6
+; RV32ZICOND-NEXT: neg a7, a1
+; RV32ZICOND-NEXT: sub a7, a7, t0
+; RV32ZICOND-NEXT: slti t0, a1, 0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: czero.nez a2, a2, a4
+; RV32ZICOND-NEXT: or a5, a5, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a4
+; RV32ZICOND-NEXT: or a2, a5, a2
+; RV32ZICOND-NEXT: neg a5, a0
+; RV32ZICOND-NEXT: czero.nez a3, a3, a4
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.nez a0, a0, t0
+; RV32ZICOND-NEXT: czero.nez a1, a1, t0
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: or a6, a6, a3
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a4
+; RV32ZICOND-NEXT: xor a4, a4, t0
+; RV32ZICOND-NEXT: or a5, a5, a0
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: or a7, a7, a1
+; RV32ZICOND-NEXT: czero.eqz a5, a5, t0
+; RV32ZICOND-NEXT: czero.eqz a7, a7, t0
+; RV32ZICOND-NEXT: neg t0, a4
+; RV32ZICOND-NEXT: or a0, a5, a0
+; RV32ZICOND-NEXT: or a3, a6, a3
+; RV32ZICOND-NEXT: or a1, a7, a1
+; RV32ZICOND-NEXT: mulhu a5, a2, a0
+; RV32ZICOND-NEXT: mul a6, a2, a0
+; RV32ZICOND-NEXT: mul a0, a3, a0
+; RV32ZICOND-NEXT: mul a3, a3, a1
+; RV32ZICOND-NEXT: mulhu a7, a2, a1
+; RV32ZICOND-NEXT: mul a1, a2, a1
+; RV32ZICOND-NEXT: xor a2, a6, t0
+; RV32ZICOND-NEXT: add a0, a5, a0
+; RV32ZICOND-NEXT: add a3, a7, a3
+; RV32ZICOND-NEXT: add a2, a2, a4
+; RV32ZICOND-NEXT: add a1, a0, a1
+; RV32ZICOND-NEXT: sltu a2, a2, a4
+; RV32ZICOND-NEXT: sltu a0, a1, a0
+; RV32ZICOND-NEXT: xor a1, a1, t0
+; RV32ZICOND-NEXT: add a0, a3, a0
+; RV32ZICOND-NEXT: add a1, a1, a2
+; RV32ZICOND-NEXT: sltu a1, a1, a2
+; RV32ZICOND-NEXT: xor a0, a0, t0
+; RV32ZICOND-NEXT: add a0, a0, a1
+; RV32ZICOND-NEXT: .LBB61_6: # %overflow.res
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: andi a0, a0, 1
+; RV32ZICOND-NEXT: beqz a0, .LBB61_9
+; RV32ZICOND-NEXT: # %bb.7: # %overflow
; RV32ZICOND-NEXT: li a0, 0
; RV32ZICOND-NEXT: ret
-; RV32ZICOND-NEXT: .LBB61_2: # %continue
+; RV32ZICOND-NEXT: .LBB61_8: # %overflow.no
+; RV32ZICOND-NEXT: .LBB61_9: # %continue
; RV32ZICOND-NEXT: li a0, 1
; RV32ZICOND-NEXT: ret
;
@@ -4819,43 +6767,56 @@ continue:
define zeroext i1 @smulo2.br.i64(i64 %v1) {
; RV32-LABEL: smulo2.br.i64:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: li a2, -13
-; RV32-NEXT: neg a3, a0
-; RV32-NEXT: li a4, -1
-; RV32-NEXT: mulhu a5, a0, a2
-; RV32-NEXT: mul a6, a1, a2
-; RV32-NEXT: mulhsu a2, a1, a2
-; RV32-NEXT: add a5, a6, a5
-; RV32-NEXT: sltu a6, a5, a6
-; RV32-NEXT: sub a5, a5, a0
-; RV32-NEXT: mulhsu a0, a4, a0
-; RV32-NEXT: add a2, a2, a6
-; RV32-NEXT: sltu a3, a5, a3
-; RV32-NEXT: add a0, a0, a3
-; RV32-NEXT: srai a3, a2, 31
-; RV32-NEXT: srai a6, a0, 31
-; RV32-NEXT: add a3, a3, a6
-; RV32-NEXT: neg a6, a1
-; RV32-NEXT: mulh a4, a1, a4
-; RV32-NEXT: srai a5, a5, 31
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: srai a2, a0, 31
+; RV32-NEXT: beq a1, a2, .LBB62_3
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: slti a2, a1, 0
+; RV32-NEXT: bltz a1, .LBB62_5
+; RV32-NEXT: # %bb.2: # %overflow.lhs
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: xori a3, a2, 1
+; RV32-NEXT: bgez a1, .LBB62_6
+; RV32-NEXT: j .LBB62_7
+; RV32-NEXT: .LBB62_3: # %overflow.no.lhs
+; RV32-NEXT: .LBB62_4: # %continue
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB62_5:
+; RV32-NEXT: neg a4, a0
+; RV32-NEXT: snez a3, a0
+; RV32-NEXT: neg a5, a1
+; RV32-NEXT: sub a5, a5, a3
+; RV32-NEXT: xori a3, a2, 1
+; RV32-NEXT: bltz a1, .LBB62_7
+; RV32-NEXT: .LBB62_6: # %overflow.lhs
+; RV32-NEXT: mv a5, a1
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: .LBB62_7: # %overflow.lhs
+; RV32-NEXT: li a0, 13
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: mul a1, a4, a0
+; RV32-NEXT: mulhu a4, a4, a0
+; RV32-NEXT: mulhu a6, a5, a0
+; RV32-NEXT: mul a0, a5, a0
+; RV32-NEXT: add a0, a4, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: sltu a4, a0, a4
+; RV32-NEXT: add a1, a1, a3
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: add a4, a6, a4
+; RV32-NEXT: sltu a1, a1, a3
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: xor a2, a4, a2
; RV32-NEXT: add a0, a2, a0
-; RV32-NEXT: sltu a2, a0, a2
-; RV32-NEXT: sub a0, a0, a1
-; RV32-NEXT: add a2, a3, a2
-; RV32-NEXT: sltu a1, a0, a6
-; RV32-NEXT: add a2, a4, a2
-; RV32-NEXT: add a1, a2, a1
-; RV32-NEXT: xor a1, a1, a5
-; RV32-NEXT: xor a0, a0, a5
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: beqz a0, .LBB62_2
-; RV32-NEXT: # %bb.1: # %overflow
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: beqz a0, .LBB62_4
+; RV32-NEXT: # %bb.8: # %overflow
; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
-; RV32-NEXT: .LBB62_2: # %continue
-; RV32-NEXT: li a0, 1
-; RV32-NEXT: ret
;
; RV64-LABEL: smulo2.br.i64:
; RV64: # %bb.0: # %entry
@@ -4872,43 +6833,58 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: smulo2.br.i64:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: li a2, -13
-; RV32ZBA-NEXT: neg a3, a0
-; RV32ZBA-NEXT: li a4, -1
-; RV32ZBA-NEXT: mulhu a5, a0, a2
-; RV32ZBA-NEXT: mul a6, a1, a2
-; RV32ZBA-NEXT: mulhsu a2, a1, a2
-; RV32ZBA-NEXT: add a5, a6, a5
-; RV32ZBA-NEXT: sltu a6, a5, a6
-; RV32ZBA-NEXT: sub a5, a5, a0
-; RV32ZBA-NEXT: mulhsu a0, a4, a0
-; RV32ZBA-NEXT: add a2, a2, a6
-; RV32ZBA-NEXT: sltu a3, a5, a3
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: srai a2, a0, 31
+; RV32ZBA-NEXT: beq a1, a2, .LBB62_3
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: slti a2, a1, 0
+; RV32ZBA-NEXT: bltz a1, .LBB62_5
+; RV32ZBA-NEXT: # %bb.2: # %overflow.lhs
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: xori a3, a2, 1
+; RV32ZBA-NEXT: bgez a1, .LBB62_6
+; RV32ZBA-NEXT: j .LBB62_7
+; RV32ZBA-NEXT: .LBB62_3: # %overflow.no.lhs
+; RV32ZBA-NEXT: .LBB62_4: # %continue
+; RV32ZBA-NEXT: li a0, 1
+; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB62_5:
+; RV32ZBA-NEXT: neg a4, a0
+; RV32ZBA-NEXT: snez a3, a0
+; RV32ZBA-NEXT: neg a5, a1
+; RV32ZBA-NEXT: sub a5, a5, a3
+; RV32ZBA-NEXT: xori a3, a2, 1
+; RV32ZBA-NEXT: bltz a1, .LBB62_7
+; RV32ZBA-NEXT: .LBB62_6: # %overflow.lhs
+; RV32ZBA-NEXT: mv a5, a1
+; RV32ZBA-NEXT: mv a4, a0
+; RV32ZBA-NEXT: .LBB62_7: # %overflow.lhs
+; RV32ZBA-NEXT: sh1add a0, a4, a4
+; RV32ZBA-NEXT: li a1, 13
+; RV32ZBA-NEXT: sh1add a6, a5, a5
+; RV32ZBA-NEXT: addi a2, a2, -1
+; RV32ZBA-NEXT: sh2add a0, a0, a4
+; RV32ZBA-NEXT: mulhu a4, a4, a1
+; RV32ZBA-NEXT: sh2add a6, a6, a5
+; RV32ZBA-NEXT: mulhu a1, a5, a1
+; RV32ZBA-NEXT: add a6, a4, a6
+; RV32ZBA-NEXT: xor a0, a0, a2
+; RV32ZBA-NEXT: sltu a4, a6, a4
; RV32ZBA-NEXT: add a0, a0, a3
-; RV32ZBA-NEXT: srai a3, a2, 31
-; RV32ZBA-NEXT: srai a6, a0, 31
-; RV32ZBA-NEXT: add a3, a3, a6
-; RV32ZBA-NEXT: neg a6, a1
-; RV32ZBA-NEXT: mulh a4, a1, a4
-; RV32ZBA-NEXT: srai a5, a5, 31
-; RV32ZBA-NEXT: add a0, a2, a0
-; RV32ZBA-NEXT: sltu a2, a0, a2
-; RV32ZBA-NEXT: sub a0, a0, a1
-; RV32ZBA-NEXT: add a2, a3, a2
-; RV32ZBA-NEXT: sltu a1, a0, a6
-; RV32ZBA-NEXT: add a2, a4, a2
-; RV32ZBA-NEXT: add a1, a2, a1
-; RV32ZBA-NEXT: xor a1, a1, a5
-; RV32ZBA-NEXT: xor a0, a0, a5
-; RV32ZBA-NEXT: or a0, a0, a1
-; RV32ZBA-NEXT: beqz a0, .LBB62_2
-; RV32ZBA-NEXT: # %bb.1: # %overflow
+; RV32ZBA-NEXT: xor a5, a6, a2
+; RV32ZBA-NEXT: add a1, a1, a4
+; RV32ZBA-NEXT: sltu a0, a0, a3
+; RV32ZBA-NEXT: add a5, a5, a0
+; RV32ZBA-NEXT: sltu a0, a5, a0
+; RV32ZBA-NEXT: xor a1, a1, a2
+; RV32ZBA-NEXT: add a0, a1, a0
+; RV32ZBA-NEXT: snez a0, a0
+; RV32ZBA-NEXT: andi a0, a0, 1
+; RV32ZBA-NEXT: beqz a0, .LBB62_4
+; RV32ZBA-NEXT: # %bb.8: # %overflow
; RV32ZBA-NEXT: li a0, 0
; RV32ZBA-NEXT: ret
-; RV32ZBA-NEXT: .LBB62_2: # %continue
-; RV32ZBA-NEXT: li a0, 1
-; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: smulo2.br.i64:
; RV64ZBA: # %bb.0: # %entry
@@ -4925,43 +6901,54 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: smulo2.br.i64:
-; RV32ZICOND: # %bb.0: # %entry
-; RV32ZICOND-NEXT: li a2, -13
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: srai a2, a0, 31
+; RV32ZICOND-NEXT: beq a1, a2, .LBB62_3
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: slti a2, a1, 0
; RV32ZICOND-NEXT: neg a3, a0
-; RV32ZICOND-NEXT: li a4, -1
-; RV32ZICOND-NEXT: mulhu a5, a0, a2
-; RV32ZICOND-NEXT: mul a6, a1, a2
-; RV32ZICOND-NEXT: mulhsu a2, a1, a2
-; RV32ZICOND-NEXT: add a5, a6, a5
-; RV32ZICOND-NEXT: sltu a6, a5, a6
-; RV32ZICOND-NEXT: sub a5, a5, a0
-; RV32ZICOND-NEXT: mulhsu a0, a4, a0
-; RV32ZICOND-NEXT: add a2, a2, a6
-; RV32ZICOND-NEXT: sltu a3, a5, a3
-; RV32ZICOND-NEXT: add a0, a0, a3
-; RV32ZICOND-NEXT: srai a3, a2, 31
-; RV32ZICOND-NEXT: srai a6, a0, 31
-; RV32ZICOND-NEXT: add a3, a3, a6
-; RV32ZICOND-NEXT: neg a6, a1
-; RV32ZICOND-NEXT: mulh a4, a1, a4
-; RV32ZICOND-NEXT: srai a5, a5, 31
-; RV32ZICOND-NEXT: add a0, a2, a0
-; RV32ZICOND-NEXT: sltu a2, a0, a2
-; RV32ZICOND-NEXT: sub a0, a0, a1
-; RV32ZICOND-NEXT: add a2, a3, a2
-; RV32ZICOND-NEXT: sltu a1, a0, a6
-; RV32ZICOND-NEXT: add a2, a4, a2
-; RV32ZICOND-NEXT: add a1, a2, a1
-; RV32ZICOND-NEXT: xor a1, a1, a5
-; RV32ZICOND-NEXT: xor a0, a0, a5
-; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: beqz a0, .LBB62_2
-; RV32ZICOND-NEXT: # %bb.1: # %overflow
-; RV32ZICOND-NEXT: li a0, 0
-; RV32ZICOND-NEXT: ret
+; RV32ZICOND-NEXT: snez a4, a0
+; RV32ZICOND-NEXT: neg a5, a1
+; RV32ZICOND-NEXT: li a6, 13
+; RV32ZICOND-NEXT: sub a5, a5, a4
+; RV32ZICOND-NEXT: xori a4, a2, 1
+; RV32ZICOND-NEXT: czero.eqz a3, a3, a2
+; RV32ZICOND-NEXT: czero.nez a0, a0, a2
+; RV32ZICOND-NEXT: czero.nez a1, a1, a2
+; RV32ZICOND-NEXT: or a3, a3, a0
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a2
+; RV32ZICOND-NEXT: or a5, a5, a1
+; RV32ZICOND-NEXT: czero.eqz a3, a3, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a2
+; RV32ZICOND-NEXT: addi a2, a2, -1
+; RV32ZICOND-NEXT: or a0, a3, a0
+; RV32ZICOND-NEXT: or a1, a5, a1
+; RV32ZICOND-NEXT: mul a3, a0, a6
+; RV32ZICOND-NEXT: mulhu a0, a0, a6
+; RV32ZICOND-NEXT: mulhu a5, a1, a6
+; RV32ZICOND-NEXT: mul a1, a1, a6
+; RV32ZICOND-NEXT: xor a3, a3, a2
+; RV32ZICOND-NEXT: add a1, a0, a1
+; RV32ZICOND-NEXT: add a3, a3, a4
+; RV32ZICOND-NEXT: sltu a0, a1, a0
+; RV32ZICOND-NEXT: sltu a3, a3, a4
+; RV32ZICOND-NEXT: xor a1, a1, a2
+; RV32ZICOND-NEXT: add a0, a5, a0
+; RV32ZICOND-NEXT: add a1, a1, a3
+; RV32ZICOND-NEXT: sltu a1, a1, a3
+; RV32ZICOND-NEXT: xor a0, a0, a2
+; RV32ZICOND-NEXT: add a0, a0, a1
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: andi a0, a0, 1
+; RV32ZICOND-NEXT: bnez a0, .LBB62_4
; RV32ZICOND-NEXT: .LBB62_2: # %continue
; RV32ZICOND-NEXT: li a0, 1
; RV32ZICOND-NEXT: ret
+; RV32ZICOND-NEXT: .LBB62_3: # %overflow.no.lhs
+; RV32ZICOND-NEXT: j .LBB62_2
+; RV32ZICOND-NEXT: .LBB62_4: # %overflow
+; RV32ZICOND-NEXT: li a0, 0
+; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: smulo2.br.i64:
; RV64ZICOND: # %bb.0: # %entry
@@ -5079,7 +7066,11 @@ continue:
define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV32-LABEL: umulo.br.i64:
-; RV32: # %bb.0: # %entry
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: beqz a1, .LBB64_4
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: beqz a3, .LBB64_6
+; RV32-NEXT: # %bb.2: # %overflow1
; RV32-NEXT: mul a4, a3, a0
; RV32-NEXT: mul a5, a1, a2
; RV32-NEXT: mulhu a6, a0, a2
@@ -5096,13 +7087,45 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV32-NEXT: sltu a2, a4, a6
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: beqz a0, .LBB64_2
-; RV32-NEXT: # %bb.1: # %overflow
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB64_2: # %continue
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: bnez a0, .LBB64_8
+; RV32-NEXT: .LBB64_3: # %continue
; RV32-NEXT: li a0, 1
; RV32-NEXT: ret
+; RV32-NEXT: .LBB64_4: # %overflow.no.lhs
+; RV32-NEXT: beqz a3, .LBB64_9
+; RV32-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32-NEXT: mulhu a4, a0, a2
+; RV32-NEXT: mul a2, a1, a2
+; RV32-NEXT: add a2, a4, a2
+; RV32-NEXT: mulhu a4, a0, a3
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, a4, a1
+; RV32-NEXT: mul a0, a0, a3
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: sltu a0, a0, a2
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: j .LBB64_7
+; RV32-NEXT: .LBB64_6: # %overflow.no.rhs.only
+; RV32-NEXT: mulhu a4, a2, a0
+; RV32-NEXT: mul a0, a3, a0
+; RV32-NEXT: add a0, a4, a0
+; RV32-NEXT: mulhu a4, a2, a1
+; RV32-NEXT: mul a3, a3, a1
+; RV32-NEXT: add a3, a4, a3
+; RV32-NEXT: mul a1, a2, a1
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: add a0, a3, a0
+; RV32-NEXT: .LBB64_7: # %overflow.no.rhs.only
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: beqz a0, .LBB64_3
+; RV32-NEXT: .LBB64_8: # %overflow
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB64_9: # %overflow.no
+; RV32-NEXT: j .LBB64_3
;
; RV64-LABEL: umulo.br.i64:
; RV64: # %bb.0: # %entry
@@ -5116,7 +7139,11 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: umulo.br.i64:
-; RV32ZBA: # %bb.0: # %entry
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: beqz a1, .LBB64_4
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB64_6
+; RV32ZBA-NEXT: # %bb.2: # %overflow1
; RV32ZBA-NEXT: mul a4, a3, a0
; RV32ZBA-NEXT: mul a5, a1, a2
; RV32ZBA-NEXT: mulhu a6, a0, a2
@@ -5133,13 +7160,45 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV32ZBA-NEXT: sltu a2, a4, a6
; RV32ZBA-NEXT: or a0, a1, a0
; RV32ZBA-NEXT: or a0, a0, a2
-; RV32ZBA-NEXT: beqz a0, .LBB64_2
-; RV32ZBA-NEXT: # %bb.1: # %overflow
-; RV32ZBA-NEXT: li a0, 0
-; RV32ZBA-NEXT: ret
-; RV32ZBA-NEXT: .LBB64_2: # %continue
+; RV32ZBA-NEXT: andi a0, a0, 1
+; RV32ZBA-NEXT: bnez a0, .LBB64_8
+; RV32ZBA-NEXT: .LBB64_3: # %continue
; RV32ZBA-NEXT: li a0, 1
; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB64_4: # %overflow.no.lhs
+; RV32ZBA-NEXT: beqz a3, .LBB64_9
+; RV32ZBA-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32ZBA-NEXT: mulhu a4, a0, a2
+; RV32ZBA-NEXT: mul a2, a1, a2
+; RV32ZBA-NEXT: add a2, a4, a2
+; RV32ZBA-NEXT: mulhu a4, a0, a3
+; RV32ZBA-NEXT: mul a1, a1, a3
+; RV32ZBA-NEXT: add a1, a4, a1
+; RV32ZBA-NEXT: mul a0, a0, a3
+; RV32ZBA-NEXT: add a0, a2, a0
+; RV32ZBA-NEXT: sltu a0, a0, a2
+; RV32ZBA-NEXT: add a0, a1, a0
+; RV32ZBA-NEXT: j .LBB64_7
+; RV32ZBA-NEXT: .LBB64_6: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: mulhu a4, a2, a0
+; RV32ZBA-NEXT: mul a0, a3, a0
+; RV32ZBA-NEXT: add a0, a4, a0
+; RV32ZBA-NEXT: mulhu a4, a2, a1
+; RV32ZBA-NEXT: mul a3, a3, a1
+; RV32ZBA-NEXT: add a3, a4, a3
+; RV32ZBA-NEXT: mul a1, a2, a1
+; RV32ZBA-NEXT: add a1, a0, a1
+; RV32ZBA-NEXT: sltu a0, a1, a0
+; RV32ZBA-NEXT: add a0, a3, a0
+; RV32ZBA-NEXT: .LBB64_7: # %overflow.no.rhs.only
+; RV32ZBA-NEXT: snez a0, a0
+; RV32ZBA-NEXT: andi a0, a0, 1
+; RV32ZBA-NEXT: beqz a0, .LBB64_3
+; RV32ZBA-NEXT: .LBB64_8: # %overflow
+; RV32ZBA-NEXT: li a0, 0
+; RV32ZBA-NEXT: ret
+; RV32ZBA-NEXT: .LBB64_9: # %overflow.no
+; RV32ZBA-NEXT: j .LBB64_3
;
; RV64ZBA-LABEL: umulo.br.i64:
; RV64ZBA: # %bb.0: # %entry
@@ -5153,7 +7212,11 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: umulo.br.i64:
-; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: beqz a1, .LBB64_4
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB64_6
+; RV32ZICOND-NEXT: # %bb.2: # %overflow1
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: mulhu a6, a0, a2
@@ -5170,13 +7233,45 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
; RV32ZICOND-NEXT: sltu a2, a4, a6
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
-; RV32ZICOND-NEXT: beqz a0, .LBB64_2
-; RV32ZICOND-NEXT: # %bb.1: # %overflow
-; RV32ZICOND-NEXT: li a0, 0
-; RV32ZICOND-NEXT: ret
-; RV32ZICOND-NEXT: .LBB64_2: # %continue
+; RV32ZICOND-NEXT: andi a0, a0, 1
+; RV32ZICOND-NEXT: bnez a0, .LBB64_8
+; RV32ZICOND-NEXT: .LBB64_3: # %continue
; RV32ZICOND-NEXT: li a0, 1
; RV32ZICOND-NEXT: ret
+; RV32ZICOND-NEXT: .LBB64_4: # %overflow.no.lhs
+; RV32ZICOND-NEXT: beqz a3, .LBB64_9
+; RV32ZICOND-NEXT: # %bb.5: # %overflow.no.lhs.only
+; RV32ZICOND-NEXT: mulhu a4, a0, a2
+; RV32ZICOND-NEXT: mul a2, a1, a2
+; RV32ZICOND-NEXT: add a2, a4, a2
+; RV32ZICOND-NEXT: mulhu a4, a0, a3
+; RV32ZICOND-NEXT: mul a1, a1, a3
+; RV32ZICOND-NEXT: add a1, a4, a1
+; RV32ZICOND-NEXT: mul a0, a0, a3
+; RV32ZICOND-NEXT: add a0, a2, a0
+; RV32ZICOND-NEXT: sltu a0, a0, a2
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: j .LBB64_7
+; RV32ZICOND-NEXT: .LBB64_6: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: mulhu a4, a2, a0
+; RV32ZICOND-NEXT: mul a0, a3, a0
+; RV32ZICOND-NEXT: add a0, a4, a0
+; RV32ZICOND-NEXT: mulhu a4, a2, a1
+; RV32ZICOND-NEXT: mul a3, a3, a1
+; RV32ZICOND-NEXT: add a3, a4, a3
+; RV32ZICOND-NEXT: mul a1, a2, a1
+; RV32ZICOND-NEXT: add a1, a0, a1
+; RV32ZICOND-NEXT: sltu a0, a1, a0
+; RV32ZICOND-NEXT: add a0, a3, a0
+; RV32ZICOND-NEXT: .LBB64_7: # %overflow.no.rhs.only
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: andi a0, a0, 1
+; RV32ZICOND-NEXT: beqz a0, .LBB64_3
+; RV32ZICOND-NEXT: .LBB64_8: # %overflow
+; RV32ZICOND-NEXT: li a0, 0
+; RV32ZICOND-NEXT: ret
+; RV32ZICOND-NEXT: .LBB64_9: # %overflow.no
+; RV32ZICOND-NEXT: j .LBB64_3
;
; RV64ZICOND-LABEL: umulo.br.i64:
; RV64ZICOND: # %bb.0: # %entry
@@ -5203,16 +7298,13 @@ continue:
define zeroext i1 @umulo2.br.i64(i64 %v1) {
; RV32-LABEL: umulo2.br.i64:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: add a2, a0, a0
-; RV32-NEXT: sltu a0, a2, a0
-; RV32-NEXT: add a2, a1, a1
-; RV32-NEXT: add a2, a2, a0
-; RV32-NEXT: beq a2, a1, .LBB65_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: sltu a0, a2, a1
-; RV32-NEXT: .LBB65_2: # %entry
-; RV32-NEXT: beqz a0, .LBB65_4
+; RV32: # %bb.0: # %overflow.entry
+; RV32-NEXT: beqz a1, .LBB65_2
+; RV32-NEXT: # %bb.1: # %overflow.lhs
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: .LBB65_2: # %overflow.res
+; RV32-NEXT: andi a1, a1, 1
+; RV32-NEXT: beqz a1, .LBB65_4
; RV32-NEXT: # %bb.3: # %overflow
; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
@@ -5232,16 +7324,13 @@ define zeroext i1 @umulo2.br.i64(i64 %v1) {
; RV64-NEXT: ret
;
; RV32ZBA-LABEL: umulo2.br.i64:
-; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: add a2, a0, a0
-; RV32ZBA-NEXT: sltu a0, a2, a0
-; RV32ZBA-NEXT: add a2, a1, a1
-; RV32ZBA-NEXT: add a2, a2, a0
-; RV32ZBA-NEXT: beq a2, a1, .LBB65_2
-; RV32ZBA-NEXT: # %bb.1: # %entry
-; RV32ZBA-NEXT: sltu a0, a2, a1
-; RV32ZBA-NEXT: .LBB65_2: # %entry
-; RV32ZBA-NEXT: beqz a0, .LBB65_4
+; RV32ZBA: # %bb.0: # %overflow.entry
+; RV32ZBA-NEXT: beqz a1, .LBB65_2
+; RV32ZBA-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZBA-NEXT: srli a1, a1, 31
+; RV32ZBA-NEXT: .LBB65_2: # %overflow.res
+; RV32ZBA-NEXT: andi a1, a1, 1
+; RV32ZBA-NEXT: beqz a1, .LBB65_4
; RV32ZBA-NEXT: # %bb.3: # %overflow
; RV32ZBA-NEXT: li a0, 0
; RV32ZBA-NEXT: ret
@@ -5261,21 +7350,17 @@ define zeroext i1 @umulo2.br.i64(i64 %v1) {
; RV64ZBA-NEXT: ret
;
; RV32ZICOND-LABEL: umulo2.br.i64:
-; RV32ZICOND: # %bb.0: # %entry
-; RV32ZICOND-NEXT: add a2, a0, a0
-; RV32ZICOND-NEXT: add a3, a1, a1
-; RV32ZICOND-NEXT: sltu a0, a2, a0
-; RV32ZICOND-NEXT: add a3, a3, a0
-; RV32ZICOND-NEXT: xor a2, a3, a1
-; RV32ZICOND-NEXT: sltu a1, a3, a1
-; RV32ZICOND-NEXT: czero.eqz a1, a1, a2
-; RV32ZICOND-NEXT: czero.nez a0, a0, a2
-; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: beqz a0, .LBB65_2
-; RV32ZICOND-NEXT: # %bb.1: # %overflow
+; RV32ZICOND: # %bb.0: # %overflow.entry
+; RV32ZICOND-NEXT: beqz a1, .LBB65_2
+; RV32ZICOND-NEXT: # %bb.1: # %overflow.lhs
+; RV32ZICOND-NEXT: srli a1, a1, 31
+; RV32ZICOND-NEXT: .LBB65_2: # %overflow.res
+; RV32ZICOND-NEXT: andi a1, a1, 1
+; RV32ZICOND-NEXT: beqz a1, .LBB65_4
+; RV32ZICOND-NEXT: # %bb.3: # %overflow
; RV32ZICOND-NEXT: li a0, 0
; RV32ZICOND-NEXT: ret
-; RV32ZICOND-NEXT: .LBB65_2: # %continue
+; RV32ZICOND-NEXT: .LBB65_4: # %continue
; RV32ZICOND-NEXT: li a0, 1
; RV32ZICOND-NEXT: ret
;
diff --git a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
index 1e5ab7922de08..ff846adf7e138 100644
--- a/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll
@@ -5,93 +5,106 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
; SPARC-LABEL: muloti_test:
-; SPARC: ! %bb.0: ! %start
+; SPARC: ! %bb.0: ! %overflow.entry
; SPARC-NEXT: save %sp, -96, %sp
-; SPARC-NEXT: ld [%fp+96], %l2
-; SPARC-NEXT: mov %i3, %g2
-; SPARC-NEXT: mov %i2, %g3
-; SPARC-NEXT: umul %i1, %l2, %l0
-; SPARC-NEXT: rd %y, %i2
-; SPARC-NEXT: ld [%fp+92], %l1
-; SPARC-NEXT: umul %i0, %l2, %i3
-; SPARC-NEXT: rd %y, %g4
-; SPARC-NEXT: addcc %i3, %i2, %i2
-; SPARC-NEXT: addxcc %g4, 0, %i3
-; SPARC-NEXT: umul %i1, %l1, %g4
-; SPARC-NEXT: rd %y, %l3
-; SPARC-NEXT: addcc %g4, %i2, %l4
-; SPARC-NEXT: addxcc %l3, 0, %i2
-; SPARC-NEXT: addcc %i3, %i2, %i2
-; SPARC-NEXT: addxcc %g0, 0, %i3
-; SPARC-NEXT: umul %i0, %l1, %g4
+; SPARC-NEXT: ld [%fp+96], %g3
+; SPARC-NEXT: ld [%fp+92], %l0
+; SPARC-NEXT: sra %i2, 31, %g2
+; SPARC-NEXT: xor %i0, %g2, %g4
+; SPARC-NEXT: xor %i1, %g2, %g2
+; SPARC-NEXT: or %g2, %g4, %g2
+; SPARC-NEXT: cmp %g2, 0
+; SPARC-NEXT: sra %l0, 31, %g2
+; SPARC-NEXT: xor %i4, %g2, %g4
+; SPARC-NEXT: xor %i5, %g2, %g2
+; SPARC-NEXT: be .LBB0_4
+; SPARC-NEXT: or %g2, %g4, %g2
+; SPARC-NEXT: ! %bb.1: ! %overflow.lhs
+; SPARC-NEXT: cmp %g2, 0
+; SPARC-NEXT: be .LBB0_15
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.2: ! %overflow
+; SPARC-NEXT: umul %i1, %g3, %l1
+; SPARC-NEXT: rd %y, %g2
+; SPARC-NEXT: umul %i0, %g3, %g4
+; SPARC-NEXT: rd %y, %l2
+; SPARC-NEXT: addcc %g4, %g2, %g2
+; SPARC-NEXT: addxcc %l2, 0, %g4
+; SPARC-NEXT: umul %i1, %l0, %l2
; SPARC-NEXT: rd %y, %l3
-; SPARC-NEXT: addcc %g4, %i2, %i2
+; SPARC-NEXT: addcc %l2, %g2, %l2
+; SPARC-NEXT: addxcc %l3, 0, %g2
+; SPARC-NEXT: addcc %g4, %g2, %g2
+; SPARC-NEXT: addxcc %g0, 0, %l3
+; SPARC-NEXT: umul %i0, %l0, %g4
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %g4, %g2, %g2
; SPARC-NEXT: sra %i0, 31, %g4
-; SPARC-NEXT: smul %l1, %g4, %l5
-; SPARC-NEXT: umul %l2, %g4, %l6
+; SPARC-NEXT: smul %l0, %g4, %l5
+; SPARC-NEXT: umul %g3, %g4, %l6
; SPARC-NEXT: rd %y, %l7
-; SPARC-NEXT: addxcc %l3, %i3, %l3
-; SPARC-NEXT: add %l7, %l6, %i3
-; SPARC-NEXT: add %i3, %l5, %l5
-; SPARC-NEXT: addcc %i2, %l6, %l6
-; SPARC-NEXT: umul %g2, %l2, %i3
-; SPARC-NEXT: rd %y, %i2
-; SPARC-NEXT: addxcc %l3, %l5, %l3
-; SPARC-NEXT: umul %g3, %l2, %l2
-; SPARC-NEXT: rd %y, %l5
-; SPARC-NEXT: addcc %l2, %i2, %i2
-; SPARC-NEXT: addxcc %l5, 0, %l2
-; SPARC-NEXT: umul %g2, %l1, %l5
+; SPARC-NEXT: addxcc %l4, %l3, %l3
+; SPARC-NEXT: add %l7, %l6, %l4
+; SPARC-NEXT: add %l4, %l5, %l4
+; SPARC-NEXT: addcc %g2, %l6, %l5
+; SPARC-NEXT: umul %i3, %g3, %g2
+; SPARC-NEXT: rd %y, %l6
+; SPARC-NEXT: addxcc %l3, %l4, %l3
+; SPARC-NEXT: umul %i2, %g3, %g3
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %g3, %l6, %g3
+; SPARC-NEXT: addxcc %l4, 0, %l4
+; SPARC-NEXT: umul %i3, %l0, %l6
; SPARC-NEXT: rd %y, %l7
-; SPARC-NEXT: addcc %l5, %i2, %i2
-; SPARC-NEXT: addxcc %l7, 0, %l5
-; SPARC-NEXT: addcc %l2, %l5, %l2
-; SPARC-NEXT: addxcc %g0, 0, %l5
-; SPARC-NEXT: umul %g3, %l1, %l1
+; SPARC-NEXT: addcc %l6, %g3, %g3
+; SPARC-NEXT: addxcc %l7, 0, %l6
+; SPARC-NEXT: addcc %l4, %l6, %l4
+; SPARC-NEXT: addxcc %g0, 0, %l6
+; SPARC-NEXT: umul %i2, %l0, %l0
; SPARC-NEXT: rd %y, %l7
-; SPARC-NEXT: addcc %l1, %l2, %l1
-; SPARC-NEXT: addxcc %l7, %l5, %l2
-; SPARC-NEXT: addcc %l0, %l1, %l0
-; SPARC-NEXT: addxcc %l4, %l2, %l1
-; SPARC-NEXT: addxcc %l6, 0, %l2
-; SPARC-NEXT: addxcc %l3, 0, %l3
-; SPARC-NEXT: umul %g2, %i5, %l4
+; SPARC-NEXT: addcc %l0, %l4, %l0
+; SPARC-NEXT: addxcc %l7, %l6, %l4
+; SPARC-NEXT: addcc %l1, %l0, %l0
+; SPARC-NEXT: addxcc %l2, %l4, %l1
+; SPARC-NEXT: addxcc %l5, 0, %l2
+; SPARC-NEXT: umul %i2, %i5, %l4
; SPARC-NEXT: rd %y, %l5
-; SPARC-NEXT: sra %l3, 31, %l6
-; SPARC-NEXT: umul %g3, %i5, %l7
-; SPARC-NEXT: rd %y, %o0
-; SPARC-NEXT: addcc %l7, %l5, %l5
-; SPARC-NEXT: addxcc %o0, 0, %l7
-; SPARC-NEXT: umul %g2, %i4, %o0
+; SPARC-NEXT: addxcc %l3, 0, %l3
+; SPARC-NEXT: umul %i3, %i5, %l6
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: sra %l3, 31, %o0
+; SPARC-NEXT: addcc %l4, %l7, %l4
+; SPARC-NEXT: addxcc %l5, 0, %l5
+; SPARC-NEXT: umul %i3, %i4, %l7
; SPARC-NEXT: rd %y, %o1
-; SPARC-NEXT: addcc %o0, %l5, %l5
-; SPARC-NEXT: addxcc %o1, 0, %o0
-; SPARC-NEXT: addcc %l7, %o0, %l7
-; SPARC-NEXT: addxcc %g0, 0, %o0
-; SPARC-NEXT: umul %g3, %i4, %o1
+; SPARC-NEXT: addcc %l7, %l4, %l4
+; SPARC-NEXT: addxcc %o1, 0, %l7
+; SPARC-NEXT: addcc %l5, %l7, %l5
+; SPARC-NEXT: addxcc %g0, 0, %l7
+; SPARC-NEXT: umul %i2, %i4, %o1
; SPARC-NEXT: rd %y, %o2
-; SPARC-NEXT: addcc %o1, %l7, %l7
+; SPARC-NEXT: addcc %o1, %l5, %l5
; SPARC-NEXT: sra %i4, 31, %o1
-; SPARC-NEXT: smul %o1, %g3, %g3
-; SPARC-NEXT: umul %o1, %g2, %g2
+; SPARC-NEXT: smul %o1, %i2, %i2
+; SPARC-NEXT: umul %o1, %i3, %i3
; SPARC-NEXT: rd %y, %o3
-; SPARC-NEXT: addxcc %o2, %o0, %o0
-; SPARC-NEXT: add %o3, %g3, %g3
-; SPARC-NEXT: add %g3, %g2, %g3
-; SPARC-NEXT: addcc %l7, %g2, %l7
-; SPARC-NEXT: addxcc %o0, %g3, %o0
-; SPARC-NEXT: addcc %l4, %l0, %g2
-; SPARC-NEXT: addxcc %l5, %l1, %g3
-; SPARC-NEXT: addxcc %l7, 0, %l0
-; SPARC-NEXT: addxcc %o0, 0, %l1
+; SPARC-NEXT: addxcc %o2, %l7, %l7
+; SPARC-NEXT: add %o3, %i2, %i2
+; SPARC-NEXT: add %i2, %i3, %i2
+; SPARC-NEXT: addcc %l5, %i3, %i3
+; SPARC-NEXT: addxcc %l7, %i2, %l5
+; SPARC-NEXT: addcc %l6, %l0, %i2
+; SPARC-NEXT: addxcc %l4, %l1, %l0
+; SPARC-NEXT: addxcc %i3, 0, %i3
+; SPARC-NEXT: addxcc %l5, 0, %l1
; SPARC-NEXT: sra %l1, 31, %l4
-; SPARC-NEXT: addcc %l2, %l0, %l0
+; SPARC-NEXT: addcc %l2, %i3, %i3
; SPARC-NEXT: addxcc %l3, %l1, %l1
-; SPARC-NEXT: addxcc %l6, %l4, %l2
+; SPARC-NEXT: addxcc %o0, %l4, %l2
; SPARC-NEXT: smul %i4, %g4, %l3
; SPARC-NEXT: umul %i5, %g4, %g4
; SPARC-NEXT: rd %y, %l5
-; SPARC-NEXT: addxcc %l6, %l4, %l4
+; SPARC-NEXT: addxcc %o0, %l4, %l4
; SPARC-NEXT: add %l5, %g4, %l5
; SPARC-NEXT: smul %o1, %i0, %l6
; SPARC-NEXT: umul %o1, %i1, %l7
@@ -113,150 +126,1050 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
; SPARC-NEXT: addxcc %l7, 0, %i5
; SPARC-NEXT: addcc %l5, %i5, %i5
; SPARC-NEXT: addxcc %g0, 0, %l5
-; SPARC-NEXT: umul %i0, %i4, %i0
-; SPARC-NEXT: rd %y, %i4
-; SPARC-NEXT: addcc %i0, %i5, %i0
-; SPARC-NEXT: addxcc %i4, %l5, %i4
-; SPARC-NEXT: addcc %i0, %g4, %i0
-; SPARC-NEXT: addxcc %i4, %l3, %i4
-; SPARC-NEXT: addcc %l6, %l0, %i5
+; SPARC-NEXT: umul %i0, %i4, %i4
+; SPARC-NEXT: mov %l0, %i0
+; SPARC-NEXT: rd %y, %l0
+; SPARC-NEXT: addcc %i4, %i5, %i4
+; SPARC-NEXT: addxcc %l0, %l5, %i5
+; SPARC-NEXT: addcc %i4, %g4, %i4
+; SPARC-NEXT: addxcc %i5, %l3, %i5
+; SPARC-NEXT: addcc %l6, %i3, %i3
; SPARC-NEXT: addxcc %i1, %l1, %i1
-; SPARC-NEXT: addxcc %i0, %l2, %i0
-; SPARC-NEXT: addxcc %i4, %l4, %i4
-; SPARC-NEXT: sra %g3, 31, %g4
-; SPARC-NEXT: xor %i4, %g4, %i4
+; SPARC-NEXT: addxcc %i4, %l2, %i4
+; SPARC-NEXT: addxcc %i5, %l4, %i5
+; SPARC-NEXT: sra %i0, 31, %g4
+; SPARC-NEXT: xor %i5, %g4, %i5
; SPARC-NEXT: xor %i1, %g4, %i1
-; SPARC-NEXT: or %i1, %i4, %i1
-; SPARC-NEXT: xor %i0, %g4, %i0
-; SPARC-NEXT: xor %i5, %g4, %i4
-; SPARC-NEXT: or %i4, %i0, %i0
-; SPARC-NEXT: or %i0, %i1, %i0
+; SPARC-NEXT: or %i1, %i5, %i1
+; SPARC-NEXT: xor %i4, %g4, %i4
+; SPARC-NEXT: xor %i3, %g4, %i3
+; SPARC-NEXT: or %i3, %i4, %i3
+; SPARC-NEXT: or %i3, %i1, %i1
+; SPARC-NEXT: cmp %i1, 0
+; SPARC-NEXT: bne .LBB0_110
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.3: ! %overflow
+; SPARC-NEXT: ba .LBB0_111
+; SPARC-NEXT: mov %g0, %g4
+; SPARC-NEXT: .LBB0_4: ! %overflow.no.lhs
+; SPARC-NEXT: cmp %g2, 0
+; SPARC-NEXT: be .LBB0_25
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.5: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov 1, %g4
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_7
+; SPARC-NEXT: mov %g4, %g2
+; SPARC-NEXT: ! %bb.6: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %g2
+; SPARC-NEXT: .LBB0_7: ! %overflow.no.lhs.only
+; SPARC-NEXT: subcc %g0, %i3, %l4
+; SPARC-NEXT: subxcc %g0, %i2, %l3
+; SPARC-NEXT: subxcc %g0, %i1, %l1
+; SPARC-NEXT: subxcc %g0, %i0, %l2
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_26
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.8: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i3, %l4
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_27
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_9: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_28
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_10: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i0, %l2
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_29
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_11: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_30
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_12: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i1, %l1
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_31
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_13: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_32
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_14: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i2, %l3
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_33
+; SPARC-NEXT: nop
+; SPARC-NEXT: ba .LBB0_34
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_15: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov 1, %g4
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_17
+; SPARC-NEXT: mov %g4, %g2
+; SPARC-NEXT: ! %bb.16: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %g2
+; SPARC-NEXT: .LBB0_17: ! %overflow.no.rhs.only
+; SPARC-NEXT: subcc %g0, %g3, %l4
+; SPARC-NEXT: subxcc %g0, %l0, %l3
+; SPARC-NEXT: subxcc %g0, %i5, %l1
+; SPARC-NEXT: subxcc %g0, %i4, %l2
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_44
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.18: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g3, %l4
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_45
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_19: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_46
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_20: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i4, %l2
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_47
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_21: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_48
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_22: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i5, %l1
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_49
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_23: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_50
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_24: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %l0, %l3
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_51
+; SPARC-NEXT: nop
+; SPARC-NEXT: ba .LBB0_52
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_25: ! %overflow.no
+; SPARC-NEXT: smul %g3, %i0, %g2
+; SPARC-NEXT: umul %g3, %i1, %i0
+; SPARC-NEXT: rd %y, %l1
+; SPARC-NEXT: mov %g0, %g4
+; SPARC-NEXT: add %l1, %g2, %g2
+; SPARC-NEXT: smul %l0, %i1, %i1
+; SPARC-NEXT: smul %i5, %i2, %l1
+; SPARC-NEXT: umul %i5, %i3, %i5
+; SPARC-NEXT: rd %y, %l2
+; SPARC-NEXT: add %g2, %i1, %i1
+; SPARC-NEXT: add %l2, %l1, %g2
+; SPARC-NEXT: smul %i4, %i3, %i4
+; SPARC-NEXT: add %g2, %i4, %i4
+; SPARC-NEXT: addcc %i5, %i0, %i0
+; SPARC-NEXT: umul %i3, %g3, %g2
+; SPARC-NEXT: rd %y, %i5
+; SPARC-NEXT: addxcc %i4, %i1, %i4
+; SPARC-NEXT: umul %i2, %g3, %i1
+; SPARC-NEXT: rd %y, %g3
+; SPARC-NEXT: addcc %i1, %i5, %i1
+; SPARC-NEXT: addxcc %g3, 0, %i5
+; SPARC-NEXT: umul %i3, %l0, %i3
+; SPARC-NEXT: rd %y, %l1
+; SPARC-NEXT: addcc %i3, %i1, %g3
+; SPARC-NEXT: addxcc %l1, 0, %i1
+; SPARC-NEXT: addcc %i5, %i1, %i1
+; SPARC-NEXT: addxcc %g0, 0, %i3
+; SPARC-NEXT: umul %i2, %l0, %i2
+; SPARC-NEXT: rd %y, %i5
+; SPARC-NEXT: addcc %i2, %i1, %i1
+; SPARC-NEXT: addxcc %i5, %i3, %i2
+; SPARC-NEXT: addcc %i1, %i0, %i1
+; SPARC-NEXT: ba .LBB0_112
+; SPARC-NEXT: addxcc %i2, %i4, %i0
+; SPARC-NEXT: .LBB0_26: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_9
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_27: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i2, %l3
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_10
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_28: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_11
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_29: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i1, %l1
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_12
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_30: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_13
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_31: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i0, %l2
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_14
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_32: ! %overflow.no.lhs.only
; SPARC-NEXT: cmp %i0, 0
-; SPARC-NEXT: bne .LBB0_2
+; SPARC-NEXT: bl .LBB0_34
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_33: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i3, %l4
+; SPARC-NEXT: .LBB0_34: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_36
+; SPARC-NEXT: mov %g4, %i0
+; SPARC-NEXT: ! %bb.35: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %i0
+; SPARC-NEXT: .LBB0_36: ! %overflow.no.lhs.only
+; SPARC-NEXT: subcc %g0, %g3, %l6
+; SPARC-NEXT: subxcc %g0, %l0, %l5
+; SPARC-NEXT: subxcc %g0, %i5, %i2
+; SPARC-NEXT: subxcc %g0, %i4, %i1
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_62
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.37: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g3, %l6
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_63
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_38: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_64
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_39: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i5, %i2
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_65
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_40: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_66
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_41: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i4, %i1
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_67
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_42: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_68
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_43: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %l0, %l5
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_69
+; SPARC-NEXT: nop
+; SPARC-NEXT: ba .LBB0_70
; SPARC-NEXT: nop
-; SPARC-NEXT: ! %bb.1: ! %start
-; SPARC-NEXT: ba .LBB0_3
+; SPARC-NEXT: .LBB0_44: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_19
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_45: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %l0, %l3
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_20
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_46: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_21
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_47: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i5, %l1
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_22
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_48: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_23
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_49: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i4, %l2
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_24
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_50: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_52
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_51: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g3, %l4
+; SPARC-NEXT: .LBB0_52: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_54
+; SPARC-NEXT: mov %g4, %i4
+; SPARC-NEXT: ! %bb.53: ! %overflow.no.rhs.only
; SPARC-NEXT: mov %g0, %i4
-; SPARC-NEXT: .LBB0_2:
-; SPARC-NEXT: mov 1, %i4
-; SPARC-NEXT: .LBB0_3: ! %start
-; SPARC-NEXT: mov %g3, %i0
+; SPARC-NEXT: .LBB0_54: ! %overflow.no.rhs.only
+; SPARC-NEXT: subcc %g0, %i3, %l5
+; SPARC-NEXT: subxcc %g0, %i2, %l0
+; SPARC-NEXT: subxcc %g0, %i1, %g3
+; SPARC-NEXT: subxcc %g0, %i0, %i5
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_85
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.55: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i3, %l5
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_86
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_56: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_87
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_57: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i1, %g3
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_88
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_58: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_89
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_59: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i0, %i5
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_90
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_60: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_91
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_61: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i2, %l0
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_92
+; SPARC-NEXT: nop
+; SPARC-NEXT: ba .LBB0_93
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_62: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_38
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_63: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %l0, %l5
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_39
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_64: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_40
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_65: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i4, %i1
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_41
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_66: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_42
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_67: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %i5, %i2
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bge .LBB0_43
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_68: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bl .LBB0_70
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_69: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g3, %l6
+; SPARC-NEXT: .LBB0_70: ! %overflow.no.lhs.only
+; SPARC-NEXT: umul %l4, %l6, %i3
+; SPARC-NEXT: rd %y, %i4
+; SPARC-NEXT: umul %l3, %l6, %i5
+; SPARC-NEXT: rd %y, %g3
+; SPARC-NEXT: addcc %i5, %i4, %i4
+; SPARC-NEXT: addxcc %g3, 0, %i5
+; SPARC-NEXT: umul %l4, %l5, %g3
+; SPARC-NEXT: rd %y, %l0
+; SPARC-NEXT: addcc %g3, %i4, %i4
+; SPARC-NEXT: addxcc %l0, 0, %g3
+; SPARC-NEXT: addcc %i5, %g3, %i5
+; SPARC-NEXT: addxcc %g0, 0, %g3
+; SPARC-NEXT: umul %l3, %l5, %l0
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l0, %i5, %i5
+; SPARC-NEXT: smul %l6, %l2, %l0
+; SPARC-NEXT: umul %l6, %l1, %l6
+; SPARC-NEXT: rd %y, %o0
+; SPARC-NEXT: addxcc %l7, %g3, %l7
+; SPARC-NEXT: add %o0, %l0, %g3
+; SPARC-NEXT: smul %l5, %l1, %l0
+; SPARC-NEXT: add %g3, %l0, %l0
+; SPARC-NEXT: addcc %i5, %l6, %g3
+; SPARC-NEXT: umul %l4, %i2, %l5
+; SPARC-NEXT: rd %y, %l6
+; SPARC-NEXT: addxcc %l7, %l0, %i5
+; SPARC-NEXT: umul %l3, %i2, %l0
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l0, %l6, %l0
+; SPARC-NEXT: addxcc %l7, 0, %l6
+; SPARC-NEXT: umul %l4, %i1, %l4
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l4, %l0, %l4
+; SPARC-NEXT: addxcc %l7, 0, %l0
+; SPARC-NEXT: addcc %l6, %l0, %l0
+; SPARC-NEXT: addxcc %g0, 0, %l6
+; SPARC-NEXT: umul %l3, %i1, %l3
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l3, %l0, %l0
+; SPARC-NEXT: smul %i2, %l2, %l2
+; SPARC-NEXT: umul %i2, %l1, %i2
+; SPARC-NEXT: rd %y, %l3
+; SPARC-NEXT: addxcc %l7, %l6, %l6
+; SPARC-NEXT: add %l3, %l2, %l2
+; SPARC-NEXT: smul %i1, %l1, %i1
+; SPARC-NEXT: add %l2, %i1, %i1
+; SPARC-NEXT: addcc %l0, %i2, %l0
+; SPARC-NEXT: addxcc %l6, %i1, %l1
+; SPARC-NEXT: addcc %g3, %l5, %i1
+; SPARC-NEXT: addxcc %i5, %l4, %i2
+; SPARC-NEXT: cmp %i2, %i5
+; SPARC-NEXT: bcs .LBB0_72
+; SPARC-NEXT: mov %g4, %l2
+; SPARC-NEXT: ! %bb.71: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %l2
+; SPARC-NEXT: .LBB0_72: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i1, %g3
+; SPARC-NEXT: bcs .LBB0_74
+; SPARC-NEXT: mov %g4, %g3
+; SPARC-NEXT: ! %bb.73: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %g3
+; SPARC-NEXT: .LBB0_74: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i2, %i5
+; SPARC-NEXT: be .LBB0_76
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.75: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %l2, %g3
+; SPARC-NEXT: .LBB0_76: ! %overflow.no.lhs.only
+; SPARC-NEXT: addcc %l0, %g3, %i5
+; SPARC-NEXT: addxcc %l1, 0, %l0
+; SPARC-NEXT: xor %i0, %g2, %i0
+; SPARC-NEXT: sub %g0, %i0, %l1
+; SPARC-NEXT: xor %i4, %l1, %i4
+; SPARC-NEXT: xor %i3, %l1, %i3
+; SPARC-NEXT: addcc %i3, %i0, %g2
+; SPARC-NEXT: addxcc %i4, 0, %g3
+; SPARC-NEXT: cmp %g2, %i0
+; SPARC-NEXT: bcs .LBB0_78
+; SPARC-NEXT: mov %g4, %i3
+; SPARC-NEXT: ! %bb.77: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %i3
+; SPARC-NEXT: .LBB0_78: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %g3, 0
+; SPARC-NEXT: be .LBB0_80
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.79: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %i3
+; SPARC-NEXT: .LBB0_80: ! %overflow.no.lhs.only
+; SPARC-NEXT: xor %i1, %l1, %i0
+; SPARC-NEXT: xor %i2, %l1, %i2
+; SPARC-NEXT: addcc %i0, %i3, %i1
+; SPARC-NEXT: addxcc %i2, 0, %i0
+; SPARC-NEXT: cmp %i1, %i3
+; SPARC-NEXT: bcs .LBB0_82
+; SPARC-NEXT: mov %g4, %i2
+; SPARC-NEXT: ! %bb.81: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %i2
+; SPARC-NEXT: .LBB0_82: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: be .LBB0_84
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.83: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %i2
+; SPARC-NEXT: .LBB0_84: ! %overflow.no.lhs.only
+; SPARC-NEXT: xor %i5, %l1, %i3
+; SPARC-NEXT: xor %l0, %l1, %i4
+; SPARC-NEXT: addcc %i3, %i2, %i2
+; SPARC-NEXT: ba .LBB0_108
+; SPARC-NEXT: addxcc %i4, 0, %i3
+; SPARC-NEXT: .LBB0_85: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_56
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_86: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i2, %l0
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_57
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_87: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_58
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_88: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i0, %i5
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_59
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_89: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_60
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_90: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i1, %g3
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bge .LBB0_61
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_91: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: bl .LBB0_93
+; SPARC-NEXT: nop
+; SPARC-NEXT: .LBB0_92: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %i3, %l5
+; SPARC-NEXT: .LBB0_93: ! %overflow.no.rhs.only
+; SPARC-NEXT: umul %l4, %l5, %i0
+; SPARC-NEXT: rd %y, %i1
+; SPARC-NEXT: umul %l3, %l5, %i2
+; SPARC-NEXT: rd %y, %i3
+; SPARC-NEXT: addcc %i2, %i1, %i1
+; SPARC-NEXT: addxcc %i3, 0, %i2
+; SPARC-NEXT: umul %l4, %l0, %i3
+; SPARC-NEXT: rd %y, %l6
+; SPARC-NEXT: addcc %i3, %i1, %i1
+; SPARC-NEXT: addxcc %l6, 0, %i3
+; SPARC-NEXT: addcc %i2, %i3, %i2
+; SPARC-NEXT: addxcc %g0, 0, %i3
+; SPARC-NEXT: umul %l3, %l0, %l6
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l6, %i2, %i2
+; SPARC-NEXT: smul %l5, %l2, %l6
+; SPARC-NEXT: umul %l5, %l1, %l5
+; SPARC-NEXT: rd %y, %o0
+; SPARC-NEXT: addxcc %l7, %i3, %l7
+; SPARC-NEXT: add %o0, %l6, %i3
+; SPARC-NEXT: smul %l0, %l1, %l0
+; SPARC-NEXT: add %i3, %l0, %l0
+; SPARC-NEXT: addcc %i2, %l5, %i3
+; SPARC-NEXT: umul %l4, %g3, %l5
+; SPARC-NEXT: rd %y, %l6
+; SPARC-NEXT: addxcc %l7, %l0, %i2
+; SPARC-NEXT: umul %l3, %g3, %l0
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l0, %l6, %l0
+; SPARC-NEXT: addxcc %l7, 0, %l6
+; SPARC-NEXT: umul %l4, %i5, %l4
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l4, %l0, %l0
+; SPARC-NEXT: addxcc %l7, 0, %l4
+; SPARC-NEXT: addcc %l6, %l4, %l4
+; SPARC-NEXT: addxcc %g0, 0, %l6
+; SPARC-NEXT: umul %l3, %i5, %l3
+; SPARC-NEXT: rd %y, %l7
+; SPARC-NEXT: addcc %l3, %l4, %l3
+; SPARC-NEXT: smul %g3, %l2, %l2
+; SPARC-NEXT: umul %g3, %l1, %g3
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addxcc %l7, %l6, %l6
+; SPARC-NEXT: add %l4, %l2, %l2
+; SPARC-NEXT: smul %i5, %l1, %i5
+; SPARC-NEXT: add %l2, %i5, %i5
+; SPARC-NEXT: addcc %l3, %g3, %g3
+; SPARC-NEXT: addxcc %l6, %i5, %l1
+; SPARC-NEXT: addcc %i3, %l5, %i5
+; SPARC-NEXT: addxcc %i2, %l0, %l0
+; SPARC-NEXT: cmp %l0, %i2
+; SPARC-NEXT: bcs .LBB0_95
+; SPARC-NEXT: mov %g4, %l2
+; SPARC-NEXT: ! %bb.94: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %l2
+; SPARC-NEXT: .LBB0_95: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i5, %i3
+; SPARC-NEXT: bcs .LBB0_97
+; SPARC-NEXT: mov %g4, %i3
+; SPARC-NEXT: ! %bb.96: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %i3
+; SPARC-NEXT: .LBB0_97: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %l0, %i2
+; SPARC-NEXT: be .LBB0_99
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.98: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %l2, %i3
+; SPARC-NEXT: .LBB0_99: ! %overflow.no.rhs.only
+; SPARC-NEXT: addcc %g3, %i3, %i2
+; SPARC-NEXT: addxcc %l1, 0, %i3
+; SPARC-NEXT: xor %g2, %i4, %l1
+; SPARC-NEXT: sub %g0, %l1, %i4
+; SPARC-NEXT: xor %i1, %i4, %i1
+; SPARC-NEXT: xor %i0, %i4, %i0
+; SPARC-NEXT: addcc %i0, %l1, %g2
+; SPARC-NEXT: addxcc %i1, 0, %g3
+; SPARC-NEXT: cmp %g2, %l1
+; SPARC-NEXT: bcs .LBB0_101
+; SPARC-NEXT: mov %g4, %l1
+; SPARC-NEXT: ! %bb.100: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %l1
+; SPARC-NEXT: .LBB0_101: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %g3, 0
+; SPARC-NEXT: be .LBB0_103
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.102: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %l1
+; SPARC-NEXT: .LBB0_103: ! %overflow.no.rhs.only
+; SPARC-NEXT: xor %i5, %i4, %i0
+; SPARC-NEXT: xor %l0, %i4, %i5
+; SPARC-NEXT: addcc %i0, %l1, %i1
+; SPARC-NEXT: addxcc %i5, 0, %i0
+; SPARC-NEXT: cmp %i1, %l1
+; SPARC-NEXT: bcs .LBB0_105
+; SPARC-NEXT: mov %g4, %i5
+; SPARC-NEXT: ! %bb.104: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %i5
+; SPARC-NEXT: .LBB0_105: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: be .LBB0_107
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.106: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %i5
+; SPARC-NEXT: .LBB0_107: ! %overflow.no.rhs.only
+; SPARC-NEXT: xor %i2, %i4, %i2
+; SPARC-NEXT: xor %i3, %i4, %i3
+; SPARC-NEXT: addcc %i2, %i5, %i2
+; SPARC-NEXT: addxcc %i3, 0, %i3
+; SPARC-NEXT: .LBB0_108: ! %overflow.no.rhs.only
+; SPARC-NEXT: or %i2, %i3, %i2
+; SPARC-NEXT: cmp %i2, 0
+; SPARC-NEXT: bne .LBB0_112
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.109: ! %overflow.no.rhs.only
+; SPARC-NEXT: ba .LBB0_112
+; SPARC-NEXT: mov %g0, %g4
+; SPARC-NEXT: .LBB0_110:
+; SPARC-NEXT: mov 1, %g4
+; SPARC-NEXT: .LBB0_111: ! %overflow
+; SPARC-NEXT: mov %i2, %i1
+; SPARC-NEXT: .LBB0_112: ! %overflow.res
+; SPARC-NEXT: and %g4, 1, %i4
+; SPARC-NEXT: mov %g3, %i2
; SPARC-NEXT: ret
-; SPARC-NEXT: restore %g0, %g2, %o1
+; SPARC-NEXT: restore %g0, %g2, %o3
;
; SPARC64-LABEL: muloti_test:
; SPARC64: .register %g2, #scratch
; SPARC64-NEXT: .register %g3, #scratch
-; SPARC64-NEXT: ! %bb.0: ! %start
+; SPARC64-NEXT: ! %bb.0: ! %overflow.entry
; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: mov %i3, %i4
-; SPARC64-NEXT: mov %i1, %i5
-; SPARC64-NEXT: mov %i0, %l2
-; SPARC64-NEXT: srax %i0, 63, %i3
-; SPARC64-NEXT: mov %i3, %o0
+; SPARC64-NEXT: mov %i1, %i4
+; SPARC64-NEXT: srax %i1, 63, %i1
+; SPARC64-NEXT: cmp %i0, %i1
+; SPARC64-NEXT: be %xcc, .LBB0_3
+; SPARC64-NEXT: srax %i3, 63, %i1
+; SPARC64-NEXT: ! %bb.1: ! %overflow.lhs
+; SPARC64-NEXT: cmp %i2, %i1
+; SPARC64-NEXT: be %xcc, .LBB0_5
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: ! %bb.2: ! %overflow
+; SPARC64-NEXT: srax %i0, 63, %i5
+; SPARC64-NEXT: mov %i5, %o0
; SPARC64-NEXT: mov %i0, %o1
; SPARC64-NEXT: mov %g0, %o2
; SPARC64-NEXT: call __multi3
-; SPARC64-NEXT: mov %i4, %o3
+; SPARC64-NEXT: mov %i3, %o3
; SPARC64-NEXT: mov %o0, %l0
; SPARC64-NEXT: mov %o1, %l1
; SPARC64-NEXT: mov %g0, %o0
-; SPARC64-NEXT: mov %i1, %o1
+; SPARC64-NEXT: mov %i4, %o1
; SPARC64-NEXT: mov %g0, %o2
; SPARC64-NEXT: call __multi3
-; SPARC64-NEXT: mov %i4, %o3
+; SPARC64-NEXT: mov %i3, %o3
; SPARC64-NEXT: mov %o1, %i1
-; SPARC64-NEXT: mov %g0, %i0
-; SPARC64-NEXT: add %l1, %o0, %l3
-; SPARC64-NEXT: cmp %l3, %l1
-; SPARC64-NEXT: movcs %xcc, 1, %i0
-; SPARC64-NEXT: srl %i0, 0, %i0
-; SPARC64-NEXT: add %l0, %i0, %l0
+; SPARC64-NEXT: mov %g0, %i3
+; SPARC64-NEXT: add %l1, %o0, %l2
+; SPARC64-NEXT: cmp %l2, %l1
+; SPARC64-NEXT: movcs %xcc, 1, %i3
+; SPARC64-NEXT: srl %i3, 0, %i3
+; SPARC64-NEXT: add %l0, %i3, %l0
; SPARC64-NEXT: srax %l0, 63, %l1
-; SPARC64-NEXT: srax %i2, 63, %i4
+; SPARC64-NEXT: srax %i2, 63, %i3
; SPARC64-NEXT: mov %g0, %o0
-; SPARC64-NEXT: mov %i5, %o1
-; SPARC64-NEXT: mov %i4, %o2
+; SPARC64-NEXT: mov %i4, %o1
+; SPARC64-NEXT: mov %i3, %o2
; SPARC64-NEXT: call __multi3
; SPARC64-NEXT: mov %i2, %o3
-; SPARC64-NEXT: mov %g0, %i5
+; SPARC64-NEXT: mov %g0, %i4
; SPARC64-NEXT: mov %g0, %g2
-; SPARC64-NEXT: add %o1, %l3, %i0
-; SPARC64-NEXT: cmp %i0, %o1
-; SPARC64-NEXT: movcs %xcc, 1, %i5
-; SPARC64-NEXT: srl %i5, 0, %i5
-; SPARC64-NEXT: add %o0, %i5, %i5
-; SPARC64-NEXT: srax %i5, 63, %g3
-; SPARC64-NEXT: add %l1, %g3, %g3
-; SPARC64-NEXT: add %l0, %i5, %i5
-; SPARC64-NEXT: cmp %i5, %l0
+; SPARC64-NEXT: add %o1, %l2, %g3
+; SPARC64-NEXT: cmp %g3, %o1
+; SPARC64-NEXT: movcs %xcc, 1, %i4
+; SPARC64-NEXT: srl %i4, 0, %i4
+; SPARC64-NEXT: add %o0, %i4, %i4
+; SPARC64-NEXT: srax %i4, 63, %g4
+; SPARC64-NEXT: add %l1, %g4, %g4
+; SPARC64-NEXT: add %l0, %i4, %i4
+; SPARC64-NEXT: cmp %i4, %l0
; SPARC64-NEXT: movcs %xcc, 1, %g2
; SPARC64-NEXT: srl %g2, 0, %g2
-; SPARC64-NEXT: add %g3, %g2, %l0
-; SPARC64-NEXT: mov %i3, %o0
-; SPARC64-NEXT: mov %l2, %o1
-; SPARC64-NEXT: mov %i4, %o2
+; SPARC64-NEXT: add %g4, %g2, %l0
+; SPARC64-NEXT: mov %i5, %o0
+; SPARC64-NEXT: mov %i0, %o1
+; SPARC64-NEXT: mov %g3, %i0
+; SPARC64-NEXT: mov %i3, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i2, %o3
+; SPARC64-NEXT: mov %g0, %i3
+; SPARC64-NEXT: mov %g0, %i2
+; SPARC64-NEXT: add %o0, %l0, %i5
+; SPARC64-NEXT: add %o1, %i4, %i4
+; SPARC64-NEXT: cmp %i4, %o1
+; SPARC64-NEXT: movcs %xcc, 1, %i3
+; SPARC64-NEXT: srl %i3, 0, %i3
+; SPARC64-NEXT: add %i5, %i3, %i3
+; SPARC64-NEXT: srax %i0, 63, %i5
+; SPARC64-NEXT: xor %i3, %i5, %i3
+; SPARC64-NEXT: xor %i4, %i5, %i4
+; SPARC64-NEXT: ba .LBB0_7
+; SPARC64-NEXT: or %i4, %i3, %i3
+; SPARC64-NEXT: .LBB0_3: ! %overflow.no.lhs
+; SPARC64-NEXT: cmp %i2, %i1
+; SPARC64-NEXT: be %xcc, .LBB0_8
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: ! %bb.4: ! %overflow.no.lhs.only
+; SPARC64-NEXT: mov %g0, %i5
+; SPARC64-NEXT: mov %g0, %i1
+; SPARC64-NEXT: mov %g0, %l0
+; SPARC64-NEXT: mov %g0, %g2
+; SPARC64-NEXT: movrnz %i4, 1, %i1
+; SPARC64-NEXT: srl %i1, 0, %i1
+; SPARC64-NEXT: add %i0, %i1, %i1
+; SPARC64-NEXT: sub %g0, %i1, %i1
+; SPARC64-NEXT: mov %i0, %g3
+; SPARC64-NEXT: movrlz %i0, %i1, %g3
+; SPARC64-NEXT: sub %g0, %i4, %i1
+; SPARC64-NEXT: mov %i4, %g4
+; SPARC64-NEXT: movrlz %i0, %i1, %g4
+; SPARC64-NEXT: movrlz %i0, 1, %i5
+; SPARC64-NEXT: movrlz %i0, %g4, %i4
+; SPARC64-NEXT: movrlz %i0, %g3, %i0
+; SPARC64-NEXT: movrlz %i2, 1, %l0
+; SPARC64-NEXT: sub %g0, %i3, %i1
+; SPARC64-NEXT: mov %i3, %g3
+; SPARC64-NEXT: movrlz %i2, %i1, %g3
+; SPARC64-NEXT: movrnz %i3, 1, %g2
+; SPARC64-NEXT: srl %g2, 0, %i1
+; SPARC64-NEXT: add %i2, %i1, %i1
+; SPARC64-NEXT: sub %g0, %i1, %i1
+; SPARC64-NEXT: mov %i2, %g2
+; SPARC64-NEXT: movrlz %i2, %i1, %g2
+; SPARC64-NEXT: movrlz %i2, %g3, %i3
+; SPARC64-NEXT: movrlz %i2, %g2, %i2
+; SPARC64-NEXT: mov %i0, %o0
+; SPARC64-NEXT: mov %i4, %o1
+; SPARC64-NEXT: mov %g0, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i3, %o3
+; SPARC64-NEXT: mov %o0, %i1
+; SPARC64-NEXT: mov %o1, %i3
+; SPARC64-NEXT: mov %i0, %o0
+; SPARC64-NEXT: mov %i4, %o1
+; SPARC64-NEXT: mov %g0, %o2
; SPARC64-NEXT: call __multi3
; SPARC64-NEXT: mov %i2, %o3
+; SPARC64-NEXT: mov %g0, %i0
+; SPARC64-NEXT: mov %g0, %i4
+; SPARC64-NEXT: mov %g0, %g2
; SPARC64-NEXT: mov %g0, %i2
+; SPARC64-NEXT: add %i1, %o1, %g3
+; SPARC64-NEXT: cmp %g3, %i1
+; SPARC64-NEXT: movcs %xcc, 1, %i0
+; SPARC64-NEXT: srl %i0, 0, %i0
+; SPARC64-NEXT: add %o0, %i0, %g4
+; SPARC64-NEXT: xor %l0, %i5, %i0
+; SPARC64-NEXT: and %i0, 1, %i1
+; SPARC64-NEXT: sub %g0, %i1, %i5
+; SPARC64-NEXT: srl %i0, 0, %i0
+; SPARC64-NEXT: xor %i3, %i5, %i1
+; SPARC64-NEXT: add %i1, %i0, %i1
+; SPARC64-NEXT: cmp %i1, %i0
+; SPARC64-NEXT: movcs %xcc, 1, %i4
+; SPARC64-NEXT: ba .LBB0_6
+; SPARC64-NEXT: srl %i4, 0, %i3
+; SPARC64-NEXT: .LBB0_5: ! %overflow.no.rhs.only
+; SPARC64-NEXT: mov %g0, %i5
+; SPARC64-NEXT: mov %g0, %i1
+; SPARC64-NEXT: mov %g0, %l0
+; SPARC64-NEXT: mov %g0, %g2
+; SPARC64-NEXT: movrnz %i3, 1, %i1
+; SPARC64-NEXT: srl %i1, 0, %i1
+; SPARC64-NEXT: add %i2, %i1, %i1
+; SPARC64-NEXT: sub %g0, %i1, %i1
+; SPARC64-NEXT: mov %i2, %g3
+; SPARC64-NEXT: movrlz %i2, %i1, %g3
+; SPARC64-NEXT: sub %g0, %i3, %i1
+; SPARC64-NEXT: mov %i3, %g4
+; SPARC64-NEXT: movrlz %i2, %i1, %g4
+; SPARC64-NEXT: movrlz %i2, 1, %i5
+; SPARC64-NEXT: movrlz %i2, %g4, %i3
+; SPARC64-NEXT: movrlz %i2, %g3, %i2
+; SPARC64-NEXT: movrlz %i0, 1, %l0
+; SPARC64-NEXT: sub %g0, %i4, %i1
+; SPARC64-NEXT: mov %i4, %g3
+; SPARC64-NEXT: movrlz %i0, %i1, %g3
+; SPARC64-NEXT: movrnz %i4, 1, %g2
+; SPARC64-NEXT: srl %g2, 0, %i1
+; SPARC64-NEXT: add %i0, %i1, %i1
+; SPARC64-NEXT: sub %g0, %i1, %i1
+; SPARC64-NEXT: mov %i0, %g2
+; SPARC64-NEXT: movrlz %i0, %i1, %g2
+; SPARC64-NEXT: movrlz %i0, %g3, %i4
+; SPARC64-NEXT: movrlz %i0, %g2, %i0
+; SPARC64-NEXT: mov %i2, %o0
+; SPARC64-NEXT: mov %i3, %o1
+; SPARC64-NEXT: mov %g0, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i4, %o3
+; SPARC64-NEXT: mov %o0, %i1
+; SPARC64-NEXT: mov %o1, %i4
+; SPARC64-NEXT: mov %i2, %o0
+; SPARC64-NEXT: mov %i3, %o1
+; SPARC64-NEXT: mov %g0, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i0, %o3
+; SPARC64-NEXT: mov %g0, %i0
; SPARC64-NEXT: mov %g0, %i3
-; SPARC64-NEXT: add %o0, %l0, %i4
-; SPARC64-NEXT: add %o1, %i5, %i5
-; SPARC64-NEXT: cmp %i5, %o1
-; SPARC64-NEXT: movcs %xcc, 1, %i2
-; SPARC64-NEXT: srl %i2, 0, %i2
-; SPARC64-NEXT: add %i4, %i2, %i2
-; SPARC64-NEXT: srax %i0, 63, %i4
-; SPARC64-NEXT: xor %i2, %i4, %i2
-; SPARC64-NEXT: xor %i5, %i4, %i4
-; SPARC64-NEXT: or %i4, %i2, %i2
-; SPARC64-NEXT: movrnz %i2, 1, %i3
-; SPARC64-NEXT: srl %i3, 0, %i2
+; SPARC64-NEXT: mov %g0, %g2
+; SPARC64-NEXT: mov %g0, %i2
+; SPARC64-NEXT: add %i1, %o1, %g3
+; SPARC64-NEXT: cmp %g3, %i1
+; SPARC64-NEXT: movcs %xcc, 1, %i0
+; SPARC64-NEXT: srl %i0, 0, %i0
+; SPARC64-NEXT: add %o0, %i0, %g4
+; SPARC64-NEXT: xor %i5, %l0, %i0
+; SPARC64-NEXT: and %i0, 1, %i1
+; SPARC64-NEXT: sub %g0, %i1, %i5
+; SPARC64-NEXT: srl %i0, 0, %i0
+; SPARC64-NEXT: xor %i4, %i5, %i1
+; SPARC64-NEXT: add %i1, %i0, %i1
+; SPARC64-NEXT: cmp %i1, %i0
+; SPARC64-NEXT: movcs %xcc, 1, %i3
+; SPARC64-NEXT: srl %i3, 0, %i3
+; SPARC64-NEXT: .LBB0_6: ! %overflow.res
+; SPARC64-NEXT: xor %g3, %i5, %i0
+; SPARC64-NEXT: add %i0, %i3, %i0
+; SPARC64-NEXT: cmp %i0, %i3
+; SPARC64-NEXT: movcs %xcc, 1, %g2
+; SPARC64-NEXT: srl %g2, 0, %i3
+; SPARC64-NEXT: xor %g4, %i5, %i4
+; SPARC64-NEXT: add %i4, %i3, %i3
+; SPARC64-NEXT: .LBB0_7: ! %overflow.res
+; SPARC64-NEXT: ba .LBB0_9
+; SPARC64-NEXT: movrnz %i3, 1, %i2
+; SPARC64-NEXT: .LBB0_8: ! %overflow.no
+; SPARC64-NEXT: mov %i0, %o0
+; SPARC64-NEXT: mov %i4, %o1
+; SPARC64-NEXT: mov %i2, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i3, %o3
+; SPARC64-NEXT: mov %o0, %i0
+; SPARC64-NEXT: mov %o1, %i1
+; SPARC64-NEXT: mov %g0, %i2
+; SPARC64-NEXT: .LBB0_9: ! %overflow.res
+; SPARC64-NEXT: and %i2, 1, %i2
; SPARC64-NEXT: ret
; SPARC64-NEXT: restore
;
; SPARC64-VIS3-LABEL: muloti_test:
; SPARC64-VIS3: .register %g2, #scratch
; SPARC64-VIS3-NEXT: .register %g3, #scratch
-; SPARC64-VIS3-NEXT: ! %bb.0: ! %start
+; SPARC64-VIS3-NEXT: ! %bb.0: ! %overflow.entry
; SPARC64-VIS3-NEXT: save %sp, -128, %sp
-; SPARC64-VIS3-NEXT: mov %g0, %i5
-; SPARC64-VIS3-NEXT: umulxhi %i0, %i3, %i4
-; SPARC64-VIS3-NEXT: srax %i0, 63, %g2
-; SPARC64-VIS3-NEXT: mulx %g2, %i3, %g3
-; SPARC64-VIS3-NEXT: add %i4, %g3, %i4
+; SPARC64-VIS3-NEXT: srax %i1, 63, %i4
+; SPARC64-VIS3-NEXT: cmp %i0, %i4
+; SPARC64-VIS3-NEXT: be %xcc, .LBB0_3
+; SPARC64-VIS3-NEXT: srax %i3, 63, %i4
+; SPARC64-VIS3-NEXT: ! %bb.1: ! %overflow.lhs
+; SPARC64-VIS3-NEXT: cmp %i2, %i4
+; SPARC64-VIS3-NEXT: be %xcc, .LBB0_5
+; SPARC64-VIS3-NEXT: nop
+; SPARC64-VIS3-NEXT: ! %bb.2: ! %overflow
+; SPARC64-VIS3-NEXT: mov %g0, %i4
+; SPARC64-VIS3-NEXT: srax %i0, 63, %i5
+; SPARC64-VIS3-NEXT: mulx %i5, %i3, %g2
+; SPARC64-VIS3-NEXT: umulxhi %i0, %i3, %g3
+; SPARC64-VIS3-NEXT: add %g3, %g2, %g2
; SPARC64-VIS3-NEXT: umulxhi %i1, %i3, %g3
; SPARC64-VIS3-NEXT: mulx %i0, %i3, %g4
; SPARC64-VIS3-NEXT: addcc %g4, %g3, %g3
-; SPARC64-VIS3-NEXT: addxccc %i4, %g0, %g4
-; SPARC64-VIS3-NEXT: umulxhi %i1, %i2, %i4
-; SPARC64-VIS3-NEXT: srax %i2, 63, %g5
-; SPARC64-VIS3-NEXT: mulx %i1, %g5, %l0
-; SPARC64-VIS3-NEXT: add %i4, %l0, %l0
-; SPARC64-VIS3-NEXT: mulx %i1, %i2, %i4
-; SPARC64-VIS3-NEXT: addcc %i4, %g3, %i4
-; SPARC64-VIS3-NEXT: addxccc %l0, %g0, %g3
-; SPARC64-VIS3-NEXT: srax %g3, 63, %l0
-; SPARC64-VIS3-NEXT: addcc %g4, %g3, %g3
-; SPARC64-VIS3-NEXT: srax %g4, 63, %g4
-; SPARC64-VIS3-NEXT: addxccc %g4, %l0, %g4
-; SPARC64-VIS3-NEXT: and %g5, %i0, %g5
-; SPARC64-VIS3-NEXT: and %g2, %i2, %g2
-; SPARC64-VIS3-NEXT: add %g2, %g5, %g2
-; SPARC64-VIS3-NEXT: umulxhi %i0, %i2, %g5
-; SPARC64-VIS3-NEXT: sub %g5, %g2, %g2
-; SPARC64-VIS3-NEXT: mulx %i0, %i2, %i0
-; SPARC64-VIS3-NEXT: addcc %i0, %g3, %i0
-; SPARC64-VIS3-NEXT: addxccc %g2, %g4, %i2
-; SPARC64-VIS3-NEXT: srax %i4, 63, %g2
+; SPARC64-VIS3-NEXT: addxccc %g2, %g0, %g2
+; SPARC64-VIS3-NEXT: srax %i2, 63, %g4
+; SPARC64-VIS3-NEXT: mulx %i1, %g4, %g5
+; SPARC64-VIS3-NEXT: umulxhi %i1, %i2, %l0
+; SPARC64-VIS3-NEXT: add %l0, %g5, %g5
+; SPARC64-VIS3-NEXT: mulx %i1, %i2, %l0
+; SPARC64-VIS3-NEXT: addcc %l0, %g3, %g3
+; SPARC64-VIS3-NEXT: addxccc %g5, %g0, %g5
+; SPARC64-VIS3-NEXT: srax %g5, 63, %l0
+; SPARC64-VIS3-NEXT: addcc %g2, %g5, %g5
+; SPARC64-VIS3-NEXT: srax %g2, 63, %g2
+; SPARC64-VIS3-NEXT: addxccc %g2, %l0, %g2
+; SPARC64-VIS3-NEXT: and %g4, %i0, %g4
+; SPARC64-VIS3-NEXT: and %i5, %i2, %i5
+; SPARC64-VIS3-NEXT: add %i5, %g4, %i5
+; SPARC64-VIS3-NEXT: umulxhi %i0, %i2, %g4
+; SPARC64-VIS3-NEXT: sub %g4, %i5, %i5
+; SPARC64-VIS3-NEXT: mulx %i0, %i2, %i2
+; SPARC64-VIS3-NEXT: mov %g3, %i0
+; SPARC64-VIS3-NEXT: addcc %i2, %g5, %i2
+; SPARC64-VIS3-NEXT: addxccc %i5, %g2, %i5
+; SPARC64-VIS3-NEXT: srax %g3, 63, %g2
+; SPARC64-VIS3-NEXT: xor %i5, %g2, %i5
; SPARC64-VIS3-NEXT: xor %i2, %g2, %i2
-; SPARC64-VIS3-NEXT: xor %i0, %g2, %i0
-; SPARC64-VIS3-NEXT: or %i0, %i2, %i0
-; SPARC64-VIS3-NEXT: movrnz %i0, 1, %i5
+; SPARC64-VIS3-NEXT: or %i2, %i5, %i2
+; SPARC64-VIS3-NEXT: ba .LBB0_7
+; SPARC64-VIS3-NEXT: movrnz %i2, 1, %i4
+; SPARC64-VIS3-NEXT: .LBB0_3: ! %overflow.no.lhs
+; SPARC64-VIS3-NEXT: cmp %i2, %i4
+; SPARC64-VIS3-NEXT: be %xcc, .LBB0_6
+; SPARC64-VIS3-NEXT: nop
+; SPARC64-VIS3-NEXT: ! %bb.4: ! %overflow.no.lhs.only
+; SPARC64-VIS3-NEXT: mov %g0, %i5
+; SPARC64-VIS3-NEXT: mov %g0, %g3
+; SPARC64-VIS3-NEXT: mov %g0, %g2
+; SPARC64-VIS3-NEXT: mov %g0, %g4
+; SPARC64-VIS3-NEXT: mov %g0, %g5
+; SPARC64-VIS3-NEXT: mov %g0, %l0
+; SPARC64-VIS3-NEXT: mov %g0, %l1
+; SPARC64-VIS3-NEXT: mov %g0, %i4
+; SPARC64-VIS3-NEXT: sub %g0, %i1, %l2
+; SPARC64-VIS3-NEXT: mov %i1, %l3
+; SPARC64-VIS3-NEXT: movrlz %i0, %l2, %l3
+; SPARC64-VIS3-NEXT: movrnz %i1, 1, %g3
+; SPARC64-VIS3-NEXT: srl %g3, 0, %g3
+; SPARC64-VIS3-NEXT: add %i0, %g3, %g3
+; SPARC64-VIS3-NEXT: sub %g0, %g3, %g3
+; SPARC64-VIS3-NEXT: mov %i0, %l2
+; SPARC64-VIS3-NEXT: movrlz %i0, %g3, %l2
+; SPARC64-VIS3-NEXT: movrlz %i0, 1, %i5
+; SPARC64-VIS3-NEXT: movrlz %i0, %l3, %i1
+; SPARC64-VIS3-NEXT: movrlz %i0, %l2, %i0
+; SPARC64-VIS3-NEXT: sub %g0, %i3, %g3
+; SPARC64-VIS3-NEXT: mov %i3, %l2
+; SPARC64-VIS3-NEXT: movrlz %i2, %g3, %l2
+; SPARC64-VIS3-NEXT: movrnz %i3, 1, %g4
+; SPARC64-VIS3-NEXT: srl %g4, 0, %g3
+; SPARC64-VIS3-NEXT: add %i2, %g3, %g3
+; SPARC64-VIS3-NEXT: sub %g0, %g3, %g3
+; SPARC64-VIS3-NEXT: mov %i2, %g4
+; SPARC64-VIS3-NEXT: movrlz %i2, %g3, %g4
+; SPARC64-VIS3-NEXT: movrlz %i2, 1, %g2
+; SPARC64-VIS3-NEXT: movrlz %i2, %l2, %i3
+; SPARC64-VIS3-NEXT: movrlz %i2, %g4, %i2
+; SPARC64-VIS3-NEXT: umulxhi %i1, %i3, %g3
+; SPARC64-VIS3-NEXT: mulx %i0, %i3, %g4
+; SPARC64-VIS3-NEXT: add %g3, %g4, %g3
+; SPARC64-VIS3-NEXT: mulx %i0, %i2, %i0
+; SPARC64-VIS3-NEXT: umulxhi %i1, %i2, %g4
+; SPARC64-VIS3-NEXT: add %g4, %i0, %i0
+; SPARC64-VIS3-NEXT: mulx %i1, %i3, %i3
+; SPARC64-VIS3-NEXT: mulx %i1, %i2, %i1
+; SPARC64-VIS3-NEXT: add %g3, %i1, %i2
+; SPARC64-VIS3-NEXT: cmp %i2, %g3
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %g5
+; SPARC64-VIS3-NEXT: srl %g5, 0, %i1
+; SPARC64-VIS3-NEXT: add %i0, %i1, %g3
+; SPARC64-VIS3-NEXT: xor %g2, %i5, %i0
+; SPARC64-VIS3-NEXT: and %i0, 1, %i1
+; SPARC64-VIS3-NEXT: sub %g0, %i1, %i5
+; SPARC64-VIS3-NEXT: srl %i0, 0, %i0
+; SPARC64-VIS3-NEXT: xor %i3, %i5, %i1
+; SPARC64-VIS3-NEXT: add %i1, %i0, %i1
+; SPARC64-VIS3-NEXT: cmp %i1, %i0
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %l0
+; SPARC64-VIS3-NEXT: srl %l0, 0, %i3
+; SPARC64-VIS3-NEXT: xor %i2, %i5, %i0
+; SPARC64-VIS3-NEXT: add %i0, %i3, %i0
+; SPARC64-VIS3-NEXT: cmp %i0, %i3
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %l1
+; SPARC64-VIS3-NEXT: srl %l1, 0, %i2
+; SPARC64-VIS3-NEXT: xor %g3, %i5, %i3
+; SPARC64-VIS3-NEXT: add %i3, %i2, %i2
+; SPARC64-VIS3-NEXT: ba .LBB0_8
+; SPARC64-VIS3-NEXT: movrnz %i2, 1, %i4
+; SPARC64-VIS3-NEXT: .LBB0_5: ! %overflow.no.rhs.only
+; SPARC64-VIS3-NEXT: mov %g0, %i5
+; SPARC64-VIS3-NEXT: mov %g0, %g3
+; SPARC64-VIS3-NEXT: mov %g0, %g2
+; SPARC64-VIS3-NEXT: mov %g0, %g4
+; SPARC64-VIS3-NEXT: mov %g0, %g5
+; SPARC64-VIS3-NEXT: mov %g0, %l0
+; SPARC64-VIS3-NEXT: mov %g0, %l1
+; SPARC64-VIS3-NEXT: mov %g0, %i4
+; SPARC64-VIS3-NEXT: sub %g0, %i3, %l2
+; SPARC64-VIS3-NEXT: mov %i3, %l3
+; SPARC64-VIS3-NEXT: movrlz %i2, %l2, %l3
+; SPARC64-VIS3-NEXT: movrnz %i3, 1, %g3
+; SPARC64-VIS3-NEXT: srl %g3, 0, %g3
+; SPARC64-VIS3-NEXT: add %i2, %g3, %g3
+; SPARC64-VIS3-NEXT: sub %g0, %g3, %g3
+; SPARC64-VIS3-NEXT: mov %i2, %l2
+; SPARC64-VIS3-NEXT: movrlz %i2, %g3, %l2
+; SPARC64-VIS3-NEXT: movrlz %i2, 1, %i5
+; SPARC64-VIS3-NEXT: movrlz %i2, %l3, %i3
+; SPARC64-VIS3-NEXT: movrlz %i2, %l2, %i2
+; SPARC64-VIS3-NEXT: sub %g0, %i1, %g3
+; SPARC64-VIS3-NEXT: mov %i1, %l2
+; SPARC64-VIS3-NEXT: movrlz %i0, %g3, %l2
+; SPARC64-VIS3-NEXT: movrnz %i1, 1, %g4
+; SPARC64-VIS3-NEXT: srl %g4, 0, %g3
+; SPARC64-VIS3-NEXT: add %i0, %g3, %g3
+; SPARC64-VIS3-NEXT: sub %g0, %g3, %g3
+; SPARC64-VIS3-NEXT: mov %i0, %g4
+; SPARC64-VIS3-NEXT: movrlz %i0, %g3, %g4
+; SPARC64-VIS3-NEXT: movrlz %i0, 1, %g2
+; SPARC64-VIS3-NEXT: movrlz %i0, %l2, %i1
+; SPARC64-VIS3-NEXT: movrlz %i0, %g4, %i0
+; SPARC64-VIS3-NEXT: umulxhi %i3, %i1, %g3
+; SPARC64-VIS3-NEXT: mulx %i2, %i1, %g4
+; SPARC64-VIS3-NEXT: add %g3, %g4, %g3
+; SPARC64-VIS3-NEXT: mulx %i2, %i0, %i2
+; SPARC64-VIS3-NEXT: umulxhi %i3, %i0, %g4
+; SPARC64-VIS3-NEXT: add %g4, %i2, %i2
+; SPARC64-VIS3-NEXT: mulx %i3, %i1, %i1
+; SPARC64-VIS3-NEXT: mulx %i3, %i0, %i0
+; SPARC64-VIS3-NEXT: add %g3, %i0, %i0
+; SPARC64-VIS3-NEXT: cmp %i0, %g3
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %g5
+; SPARC64-VIS3-NEXT: srl %g5, 0, %i3
+; SPARC64-VIS3-NEXT: add %i2, %i3, %i2
+; SPARC64-VIS3-NEXT: xor %i5, %g2, %i3
+; SPARC64-VIS3-NEXT: and %i3, 1, %i5
+; SPARC64-VIS3-NEXT: sub %g0, %i5, %i5
+; SPARC64-VIS3-NEXT: srl %i3, 0, %i3
+; SPARC64-VIS3-NEXT: xor %i1, %i5, %i1
+; SPARC64-VIS3-NEXT: add %i1, %i3, %i1
+; SPARC64-VIS3-NEXT: cmp %i1, %i3
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %l0
+; SPARC64-VIS3-NEXT: srl %l0, 0, %i3
+; SPARC64-VIS3-NEXT: xor %i0, %i5, %i0
+; SPARC64-VIS3-NEXT: add %i0, %i3, %i0
+; SPARC64-VIS3-NEXT: cmp %i0, %i3
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %l1
+; SPARC64-VIS3-NEXT: srl %l1, 0, %i3
+; SPARC64-VIS3-NEXT: xor %i2, %i5, %i2
+; SPARC64-VIS3-NEXT: add %i2, %i3, %i2
+; SPARC64-VIS3-NEXT: ba .LBB0_8
+; SPARC64-VIS3-NEXT: movrnz %i2, 1, %i4
+; SPARC64-VIS3-NEXT: .LBB0_6: ! %overflow.no
+; SPARC64-VIS3-NEXT: mov %g0, %i4
+; SPARC64-VIS3-NEXT: mulx %i1, %i2, %i2
+; SPARC64-VIS3-NEXT: umulxhi %i1, %i3, %i5
+; SPARC64-VIS3-NEXT: add %i5, %i2, %i2
+; SPARC64-VIS3-NEXT: mulx %i0, %i3, %i0
+; SPARC64-VIS3-NEXT: add %i2, %i0, %i0
+; SPARC64-VIS3-NEXT: .LBB0_7: ! %overflow.res
; SPARC64-VIS3-NEXT: mulx %i1, %i3, %i1
-; SPARC64-VIS3-NEXT: srl %i5, 0, %i2
+; SPARC64-VIS3-NEXT: .LBB0_8: ! %overflow.res
+; SPARC64-VIS3-NEXT: and %i4, 1, %i2
; SPARC64-VIS3-NEXT: ret
-; SPARC64-VIS3-NEXT: restore %g0, %i4, %o0
+; SPARC64-VIS3-NEXT: restore
start:
%0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %l, i128 %r)
%1 = extractvalue { i128, i1 } %0, 0
diff --git a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll
index 6d197c88bfecd..4533523f97d74 100644
--- a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll
@@ -5,207 +5,470 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
; SPARC-LABEL: muloti_test:
-; SPARC: ! %bb.0: ! %start
+; SPARC: ! %bb.0: ! %overflow.entry
; SPARC-NEXT: save %sp, -96, %sp
+; SPARC-NEXT: ld [%fp+96], %l1
+; SPARC-NEXT: ld [%fp+92], %g4
+; SPARC-NEXT: or %i1, %i0, %l0
+; SPARC-NEXT: cmp %l0, 0
; SPARC-NEXT: mov %i3, %g2
-; SPARC-NEXT: mov %i2, %g4
-; SPARC-NEXT: umul %i2, %i5, %i2
+; SPARC-NEXT: be .LBB0_33
+; SPARC-NEXT: mov %i2, %g3
+; SPARC-NEXT: ! %bb.1: ! %overflow.lhs
+; SPARC-NEXT: or %i5, %i4, %l2
+; SPARC-NEXT: cmp %l2, 0
+; SPARC-NEXT: be .LBB0_40
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.2: ! %overflow
+; SPARC-NEXT: umul %g3, %i5, %i2
; SPARC-NEXT: rd %y, %l7
-; SPARC-NEXT: ld [%fp+92], %l4
-; SPARC-NEXT: umul %i4, %i3, %i3
-; SPARC-NEXT: rd %y, %o1
-; SPARC-NEXT: ld [%fp+96], %g3
-; SPARC-NEXT: umul %i5, %g2, %l3
+; SPARC-NEXT: umul %i4, %g2, %i3
+; SPARC-NEXT: rd %y, %o2
+; SPARC-NEXT: umul %i5, %g2, %l5
; SPARC-NEXT: rd %y, %o0
-; SPARC-NEXT: umul %l4, %i1, %l2
-; SPARC-NEXT: rd %y, %l1
+; SPARC-NEXT: umul %g4, %i1, %l4
+; SPARC-NEXT: rd %y, %l3
; SPARC-NEXT: add %i3, %i2, %i2
-; SPARC-NEXT: umul %i0, %g3, %i3
+; SPARC-NEXT: umul %i0, %l1, %i3
; SPARC-NEXT: rd %y, %l6
-; SPARC-NEXT: add %o0, %i2, %o2
-; SPARC-NEXT: umul %i1, %g3, %i2
-; SPARC-NEXT: rd %y, %l0
-; SPARC-NEXT: add %i3, %l2, %i3
-; SPARC-NEXT: add %l0, %i3, %l2
-; SPARC-NEXT: addcc %i2, %l3, %l3
-; SPARC-NEXT: umul %g2, %g3, %i3
+; SPARC-NEXT: add %o0, %i2, %o1
+; SPARC-NEXT: umul %i1, %l1, %i1
+; SPARC-NEXT: rd %y, %i5
+; SPARC-NEXT: add %i3, %l4, %i2
+; SPARC-NEXT: add %i5, %i2, %l4
+; SPARC-NEXT: addcc %i1, %l5, %i1
+; SPARC-NEXT: umul %g2, %l1, %i3
; SPARC-NEXT: rd %y, %i2
-; SPARC-NEXT: addxcc %l2, %o2, %o4
-; SPARC-NEXT: umul %g4, %g3, %g3
+; SPARC-NEXT: addxcc %l4, %o1, %o4
+; SPARC-NEXT: umul %g3, %l1, %l1
; SPARC-NEXT: rd %y, %l5
-; SPARC-NEXT: addcc %g3, %i2, %i2
-; SPARC-NEXT: addxcc %l5, 0, %g3
-; SPARC-NEXT: umul %g2, %l4, %g2
+; SPARC-NEXT: addcc %l1, %i2, %i2
+; SPARC-NEXT: addxcc %l5, 0, %l1
+; SPARC-NEXT: umul %g2, %g4, %g2
; SPARC-NEXT: rd %y, %l5
; SPARC-NEXT: addcc %g2, %i2, %i2
; SPARC-NEXT: addxcc %l5, 0, %g2
-; SPARC-NEXT: addcc %g3, %g2, %g2
-; SPARC-NEXT: addxcc %g0, 0, %g3
-; SPARC-NEXT: umul %g4, %l4, %l5
+; SPARC-NEXT: addcc %l1, %g2, %g2
+; SPARC-NEXT: addxcc %g0, 0, %l1
+; SPARC-NEXT: umul %g3, %g4, %l5
; SPARC-NEXT: rd %y, %o3
; SPARC-NEXT: addcc %l5, %g2, %l5
-; SPARC-NEXT: addxcc %o3, %g3, %o3
-; SPARC-NEXT: addcc %l5, %l3, %g2
-; SPARC-NEXT: addxcc %o3, %o4, %g3
-; SPARC-NEXT: mov 1, %l3
-; SPARC-NEXT: cmp %g3, %o3
-; SPARC-NEXT: bcs .LBB0_2
-; SPARC-NEXT: mov %l3, %o4
-; SPARC-NEXT: ! %bb.1: ! %start
-; SPARC-NEXT: mov %g0, %o4
-; SPARC-NEXT: .LBB0_2: ! %start
-; SPARC-NEXT: cmp %g2, %l5
+; SPARC-NEXT: addxcc %o3, %l1, %o3
+; SPARC-NEXT: addcc %l5, %i1, %i1
+; SPARC-NEXT: addxcc %o3, %o4, %g2
+; SPARC-NEXT: mov 1, %l1
+; SPARC-NEXT: cmp %g2, %o3
; SPARC-NEXT: bcs .LBB0_4
-; SPARC-NEXT: mov %l3, %l5
-; SPARC-NEXT: ! %bb.3: ! %start
+; SPARC-NEXT: mov %l1, %o4
+; SPARC-NEXT: ! %bb.3: ! %overflow
+; SPARC-NEXT: mov %g0, %o4
+; SPARC-NEXT: .LBB0_4: ! %overflow
+; SPARC-NEXT: cmp %i1, %l5
+; SPARC-NEXT: bcs .LBB0_6
+; SPARC-NEXT: mov %l1, %l5
+; SPARC-NEXT: ! %bb.5: ! %overflow
; SPARC-NEXT: mov %g0, %l5
-; SPARC-NEXT: .LBB0_4: ! %start
-; SPARC-NEXT: cmp %g3, %o3
-; SPARC-NEXT: be .LBB0_6
+; SPARC-NEXT: .LBB0_6: ! %overflow
+; SPARC-NEXT: cmp %g2, %o3
+; SPARC-NEXT: be .LBB0_8
; SPARC-NEXT: nop
-; SPARC-NEXT: ! %bb.5: ! %start
+; SPARC-NEXT: ! %bb.7: ! %overflow
; SPARC-NEXT: mov %o4, %l5
-; SPARC-NEXT: .LBB0_6: ! %start
-; SPARC-NEXT: cmp %g4, 0
-; SPARC-NEXT: bne .LBB0_8
-; SPARC-NEXT: mov %l3, %o3
-; SPARC-NEXT: ! %bb.7: ! %start
-; SPARC-NEXT: mov %g0, %o3
-; SPARC-NEXT: .LBB0_8: ! %start
+; SPARC-NEXT: .LBB0_8: ! %overflow
; SPARC-NEXT: cmp %i4, 0
; SPARC-NEXT: bne .LBB0_10
-; SPARC-NEXT: mov %l3, %o4
-; SPARC-NEXT: ! %bb.9: ! %start
-; SPARC-NEXT: mov %g0, %o4
-; SPARC-NEXT: .LBB0_10: ! %start
-; SPARC-NEXT: cmp %o1, 0
+; SPARC-NEXT: mov %l1, %o3
+; SPARC-NEXT: ! %bb.9: ! %overflow
+; SPARC-NEXT: mov %g0, %o3
+; SPARC-NEXT: .LBB0_10: ! %overflow
+; SPARC-NEXT: cmp %g3, 0
; SPARC-NEXT: bne .LBB0_12
-; SPARC-NEXT: mov %l3, %o1
-; SPARC-NEXT: ! %bb.11: ! %start
-; SPARC-NEXT: mov %g0, %o1
-; SPARC-NEXT: .LBB0_12: ! %start
-; SPARC-NEXT: cmp %l7, 0
+; SPARC-NEXT: mov %l1, %o4
+; SPARC-NEXT: ! %bb.11: ! %overflow
+; SPARC-NEXT: mov %g0, %o4
+; SPARC-NEXT: .LBB0_12: ! %overflow
+; SPARC-NEXT: cmp %o2, 0
; SPARC-NEXT: bne .LBB0_14
-; SPARC-NEXT: mov %l3, %l7
-; SPARC-NEXT: ! %bb.13: ! %start
-; SPARC-NEXT: mov %g0, %l7
-; SPARC-NEXT: .LBB0_14: ! %start
-; SPARC-NEXT: cmp %o2, %o0
-; SPARC-NEXT: bcs .LBB0_16
-; SPARC-NEXT: mov %l3, %g4
-; SPARC-NEXT: ! %bb.15: ! %start
-; SPARC-NEXT: mov %g0, %g4
-; SPARC-NEXT: .LBB0_16: ! %start
-; SPARC-NEXT: cmp %l4, 0
-; SPARC-NEXT: bne .LBB0_18
-; SPARC-NEXT: mov %l3, %l4
-; SPARC-NEXT: ! %bb.17: ! %start
-; SPARC-NEXT: mov %g0, %l4
-; SPARC-NEXT: .LBB0_18: ! %start
+; SPARC-NEXT: mov %l1, %o2
+; SPARC-NEXT: ! %bb.13: ! %overflow
+; SPARC-NEXT: mov %g0, %o2
+; SPARC-NEXT: .LBB0_14: ! %overflow
+; SPARC-NEXT: cmp %l7, 0
+; SPARC-NEXT: bne .LBB0_16
+; SPARC-NEXT: mov %l1, %g3
+; SPARC-NEXT: ! %bb.15: ! %overflow
+; SPARC-NEXT: mov %g0, %g3
+; SPARC-NEXT: .LBB0_16: ! %overflow
+; SPARC-NEXT: cmp %o1, %o0
+; SPARC-NEXT: bcs .LBB0_18
+; SPARC-NEXT: mov %l1, %i4
+; SPARC-NEXT: ! %bb.17: ! %overflow
+; SPARC-NEXT: mov %g0, %i4
+; SPARC-NEXT: .LBB0_18: ! %overflow
; SPARC-NEXT: cmp %i0, 0
; SPARC-NEXT: bne .LBB0_20
-; SPARC-NEXT: mov %l3, %o0
-; SPARC-NEXT: ! %bb.19: ! %start
-; SPARC-NEXT: mov %g0, %o0
-; SPARC-NEXT: .LBB0_20: ! %start
-; SPARC-NEXT: cmp %l6, 0
+; SPARC-NEXT: mov %l1, %i0
+; SPARC-NEXT: ! %bb.19: ! %overflow
+; SPARC-NEXT: mov %g0, %i0
+; SPARC-NEXT: .LBB0_20: ! %overflow
+; SPARC-NEXT: cmp %g4, 0
; SPARC-NEXT: bne .LBB0_22
-; SPARC-NEXT: mov %l3, %l6
-; SPARC-NEXT: ! %bb.21: ! %start
-; SPARC-NEXT: mov %g0, %l6
-; SPARC-NEXT: .LBB0_22: ! %start
-; SPARC-NEXT: and %o4, %o3, %o2
-; SPARC-NEXT: cmp %l1, 0
-; SPARC-NEXT: and %o0, %l4, %l4
+; SPARC-NEXT: mov %l1, %l7
+; SPARC-NEXT: ! %bb.21: ! %overflow
+; SPARC-NEXT: mov %g0, %l7
+; SPARC-NEXT: .LBB0_22: ! %overflow
+; SPARC-NEXT: cmp %l6, 0
; SPARC-NEXT: bne .LBB0_24
-; SPARC-NEXT: mov %l3, %l1
-; SPARC-NEXT: ! %bb.23: ! %start
-; SPARC-NEXT: mov %g0, %l1
-; SPARC-NEXT: .LBB0_24: ! %start
-; SPARC-NEXT: or %o2, %o1, %o0
-; SPARC-NEXT: cmp %l2, %l0
-; SPARC-NEXT: or %l4, %l6, %l4
-; SPARC-NEXT: bcs .LBB0_26
-; SPARC-NEXT: mov %l3, %l0
-; SPARC-NEXT: ! %bb.25: ! %start
-; SPARC-NEXT: mov %g0, %l0
-; SPARC-NEXT: .LBB0_26: ! %start
-; SPARC-NEXT: or %o0, %l7, %l2
-; SPARC-NEXT: or %i5, %i4, %i4
-; SPARC-NEXT: cmp %i4, 0
-; SPARC-NEXT: or %l4, %l1, %l1
-; SPARC-NEXT: bne .LBB0_28
-; SPARC-NEXT: mov %l3, %i4
-; SPARC-NEXT: ! %bb.27: ! %start
-; SPARC-NEXT: mov %g0, %i4
-; SPARC-NEXT: .LBB0_28: ! %start
-; SPARC-NEXT: or %l2, %g4, %i5
-; SPARC-NEXT: or %i1, %i0, %i0
-; SPARC-NEXT: cmp %i0, 0
+; SPARC-NEXT: mov %l1, %g4
+; SPARC-NEXT: ! %bb.23: ! %overflow
+; SPARC-NEXT: mov %g0, %g4
+; SPARC-NEXT: .LBB0_24: ! %overflow
+; SPARC-NEXT: and %o3, %o4, %l6
+; SPARC-NEXT: cmp %l3, 0
+; SPARC-NEXT: and %i0, %l7, %l7
+; SPARC-NEXT: bne .LBB0_26
+; SPARC-NEXT: mov %l1, %i0
+; SPARC-NEXT: ! %bb.25: ! %overflow
+; SPARC-NEXT: mov %g0, %i0
+; SPARC-NEXT: .LBB0_26: ! %overflow
+; SPARC-NEXT: or %l6, %o2, %l3
+; SPARC-NEXT: cmp %l4, %i5
+; SPARC-NEXT: or %l7, %g4, %g4
+; SPARC-NEXT: bcs .LBB0_28
+; SPARC-NEXT: mov %l1, %i5
+; SPARC-NEXT: ! %bb.27: ! %overflow
+; SPARC-NEXT: mov %g0, %i5
+; SPARC-NEXT: .LBB0_28: ! %overflow
+; SPARC-NEXT: or %l3, %g3, %g3
+; SPARC-NEXT: cmp %l2, 0
+; SPARC-NEXT: or %g4, %i0, %g4
; SPARC-NEXT: bne .LBB0_30
-; SPARC-NEXT: or %l1, %l0, %i0
-; SPARC-NEXT: ! %bb.29: ! %start
-; SPARC-NEXT: mov %g0, %l3
-; SPARC-NEXT: .LBB0_30: ! %start
-; SPARC-NEXT: and %l3, %i4, %i1
-; SPARC-NEXT: or %i1, %i0, %i0
+; SPARC-NEXT: mov %l1, %i0
+; SPARC-NEXT: ! %bb.29: ! %overflow
+; SPARC-NEXT: mov %g0, %i0
+; SPARC-NEXT: .LBB0_30: ! %overflow
+; SPARC-NEXT: or %g3, %i4, %i4
+; SPARC-NEXT: cmp %l0, 0
+; SPARC-NEXT: bne .LBB0_32
+; SPARC-NEXT: or %g4, %i5, %i5
+; SPARC-NEXT: ! %bb.31: ! %overflow
+; SPARC-NEXT: mov %g0, %l1
+; SPARC-NEXT: .LBB0_32: ! %overflow
+; SPARC-NEXT: and %l1, %i0, %i0
; SPARC-NEXT: or %i0, %i5, %i0
+; SPARC-NEXT: or %i0, %i4, %i0
+; SPARC-NEXT: ba .LBB0_49
; SPARC-NEXT: or %i0, %l5, %i0
+; SPARC-NEXT: .LBB0_33: ! %overflow.no.lhs
+; SPARC-NEXT: or %i5, %i4, %i2
+; SPARC-NEXT: cmp %i2, 0
+; SPARC-NEXT: be .LBB0_48
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.34: ! %overflow.no.lhs.only
+; SPARC-NEXT: umul %g3, %l1, %i2
+; SPARC-NEXT: rd %y, %l0
+; SPARC-NEXT: umul %g2, %l1, %i3
+; SPARC-NEXT: rd %y, %l2
+; SPARC-NEXT: addcc %i2, %l2, %i2
+; SPARC-NEXT: addxcc %l0, 0, %l0
+; SPARC-NEXT: umul %g2, %g4, %l2
+; SPARC-NEXT: rd %y, %l3
+; SPARC-NEXT: addcc %l2, %i2, %i2
+; SPARC-NEXT: addxcc %l3, 0, %l2
+; SPARC-NEXT: addcc %l0, %l2, %l0
+; SPARC-NEXT: addxcc %g0, 0, %l2
+; SPARC-NEXT: umul %g3, %g4, %l3
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %l3, %l0, %l0
+; SPARC-NEXT: smul %l1, %i0, %l3
+; SPARC-NEXT: umul %l1, %i1, %l1
+; SPARC-NEXT: rd %y, %l5
+; SPARC-NEXT: addxcc %l4, %l2, %l2
+; SPARC-NEXT: add %l5, %l3, %l3
+; SPARC-NEXT: smul %g4, %i1, %g4
+; SPARC-NEXT: add %l3, %g4, %g4
+; SPARC-NEXT: addcc %l0, %l1, %l0
+; SPARC-NEXT: umul %g2, %i5, %l1
+; SPARC-NEXT: rd %y, %l3
+; SPARC-NEXT: addxcc %l2, %g4, %g4
+; SPARC-NEXT: umul %g3, %i5, %l2
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %l2, %l3, %l2
+; SPARC-NEXT: addxcc %l4, 0, %l3
+; SPARC-NEXT: umul %g2, %i4, %g2
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %g2, %l2, %g2
+; SPARC-NEXT: addxcc %l4, 0, %l2
+; SPARC-NEXT: addcc %l3, %l2, %l2
+; SPARC-NEXT: addxcc %g0, 0, %l3
+; SPARC-NEXT: umul %g3, %i4, %g3
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %g3, %l2, %g3
+; SPARC-NEXT: smul %i5, %i0, %i0
+; SPARC-NEXT: umul %i5, %i1, %i5
+; SPARC-NEXT: rd %y, %l2
+; SPARC-NEXT: addxcc %l4, %l3, %l3
+; SPARC-NEXT: add %l2, %i0, %i0
+; SPARC-NEXT: smul %i4, %i1, %i1
+; SPARC-NEXT: add %i0, %i1, %i0
+; SPARC-NEXT: addcc %g3, %i5, %i4
+; SPARC-NEXT: addxcc %l3, %i0, %i5
+; SPARC-NEXT: addcc %l0, %l1, %i1
+; SPARC-NEXT: addxcc %g4, %g2, %g2
+; SPARC-NEXT: mov 1, %i0
+; SPARC-NEXT: cmp %g2, %g4
+; SPARC-NEXT: bcs .LBB0_36
+; SPARC-NEXT: mov %i0, %g3
+; SPARC-NEXT: ! %bb.35: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %g3
+; SPARC-NEXT: .LBB0_36: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %i1, %l0
+; SPARC-NEXT: bcs .LBB0_38
+; SPARC-NEXT: mov %i0, %l0
+; SPARC-NEXT: ! %bb.37: ! %overflow.no.lhs.only
+; SPARC-NEXT: mov %g0, %l0
+; SPARC-NEXT: .LBB0_38: ! %overflow.no.lhs.only
+; SPARC-NEXT: cmp %g2, %g4
+; SPARC-NEXT: be .LBB0_46
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.39: ! %overflow.no.lhs.only
+; SPARC-NEXT: ba .LBB0_46
+; SPARC-NEXT: mov %g3, %l0
+; SPARC-NEXT: .LBB0_40: ! %overflow.no.rhs.only
+; SPARC-NEXT: umul %g4, %g2, %i2
+; SPARC-NEXT: rd %y, %l0
+; SPARC-NEXT: umul %l1, %g2, %i3
+; SPARC-NEXT: rd %y, %l2
+; SPARC-NEXT: addcc %i2, %l2, %i2
+; SPARC-NEXT: addxcc %l0, 0, %l0
+; SPARC-NEXT: umul %l1, %g3, %l2
+; SPARC-NEXT: rd %y, %l3
+; SPARC-NEXT: addcc %l2, %i2, %i2
+; SPARC-NEXT: addxcc %l3, 0, %l2
+; SPARC-NEXT: addcc %l0, %l2, %l0
+; SPARC-NEXT: addxcc %g0, 0, %l2
+; SPARC-NEXT: umul %g4, %g3, %l3
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %l3, %l0, %l0
+; SPARC-NEXT: smul %g2, %i4, %l3
+; SPARC-NEXT: umul %g2, %i5, %g2
+; SPARC-NEXT: rd %y, %l5
+; SPARC-NEXT: addxcc %l4, %l2, %l2
+; SPARC-NEXT: add %l5, %l3, %l3
+; SPARC-NEXT: smul %g3, %i5, %g3
+; SPARC-NEXT: add %l3, %g3, %g3
+; SPARC-NEXT: addcc %l0, %g2, %l0
+; SPARC-NEXT: umul %l1, %i1, %g2
+; SPARC-NEXT: rd %y, %l3
+; SPARC-NEXT: addxcc %l2, %g3, %g3
+; SPARC-NEXT: umul %g4, %i1, %l2
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %l2, %l3, %l2
+; SPARC-NEXT: addxcc %l4, 0, %l3
+; SPARC-NEXT: umul %l1, %i0, %l1
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %l1, %l2, %l1
+; SPARC-NEXT: addxcc %l4, 0, %l2
+; SPARC-NEXT: addcc %l3, %l2, %l2
+; SPARC-NEXT: addxcc %g0, 0, %l3
+; SPARC-NEXT: umul %g4, %i0, %g4
+; SPARC-NEXT: rd %y, %l4
+; SPARC-NEXT: addcc %g4, %l2, %g4
+; SPARC-NEXT: smul %i1, %i4, %i4
+; SPARC-NEXT: umul %i1, %i5, %i1
+; SPARC-NEXT: rd %y, %l2
+; SPARC-NEXT: addxcc %l4, %l3, %l3
+; SPARC-NEXT: add %l2, %i4, %i4
+; SPARC-NEXT: smul %i0, %i5, %i0
+; SPARC-NEXT: add %i4, %i0, %i0
+; SPARC-NEXT: addcc %g4, %i1, %i4
+; SPARC-NEXT: addxcc %l3, %i0, %i5
+; SPARC-NEXT: addcc %l0, %g2, %i1
+; SPARC-NEXT: addxcc %g3, %l1, %g2
+; SPARC-NEXT: mov 1, %i0
+; SPARC-NEXT: cmp %g2, %g3
+; SPARC-NEXT: bcs .LBB0_42
+; SPARC-NEXT: mov %i0, %g4
+; SPARC-NEXT: ! %bb.41: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %g4
+; SPARC-NEXT: .LBB0_42: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %i1, %l0
+; SPARC-NEXT: bcs .LBB0_44
+; SPARC-NEXT: mov %i0, %l0
+; SPARC-NEXT: ! %bb.43: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g0, %l0
+; SPARC-NEXT: .LBB0_44: ! %overflow.no.rhs.only
+; SPARC-NEXT: cmp %g2, %g3
+; SPARC-NEXT: be .LBB0_46
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.45: ! %overflow.no.rhs.only
+; SPARC-NEXT: mov %g4, %l0
+; SPARC-NEXT: .LBB0_46: ! %overflow.no.rhs.only
+; SPARC-NEXT: addcc %i4, %l0, %i4
+; SPARC-NEXT: addxcc %i5, 0, %i5
+; SPARC-NEXT: or %i4, %i5, %i4
+; SPARC-NEXT: cmp %i4, 0
+; SPARC-NEXT: bne .LBB0_49
+; SPARC-NEXT: nop
+; SPARC-NEXT: ! %bb.47: ! %overflow.no.rhs.only
+; SPARC-NEXT: ba .LBB0_49
+; SPARC-NEXT: mov %g0, %i0
+; SPARC-NEXT: .LBB0_48: ! %overflow.no
+; SPARC-NEXT: smul %l1, %i0, %i3
+; SPARC-NEXT: umul %l1, %i1, %i2
+; SPARC-NEXT: rd %y, %l0
+; SPARC-NEXT: mov %g0, %i0
+; SPARC-NEXT: add %l0, %i3, %i3
+; SPARC-NEXT: smul %g4, %i1, %i1
+; SPARC-NEXT: smul %i5, %g3, %l0
+; SPARC-NEXT: umul %i5, %g2, %i5
+; SPARC-NEXT: rd %y, %l2
+; SPARC-NEXT: add %i3, %i1, %i1
+; SPARC-NEXT: add %l2, %l0, %i3
+; SPARC-NEXT: smul %i4, %g2, %i4
+; SPARC-NEXT: add %i3, %i4, %i4
+; SPARC-NEXT: addcc %i5, %i2, %i5
+; SPARC-NEXT: umul %g2, %l1, %i3
+; SPARC-NEXT: rd %y, %i2
+; SPARC-NEXT: addxcc %i4, %i1, %i4
+; SPARC-NEXT: umul %g3, %l1, %i1
+; SPARC-NEXT: rd %y, %l0
+; SPARC-NEXT: addcc %i1, %i2, %i1
+; SPARC-NEXT: addxcc %l0, 0, %l0
+; SPARC-NEXT: umul %g2, %g4, %i2
+; SPARC-NEXT: rd %y, %g2
+; SPARC-NEXT: addcc %i2, %i1, %i2
+; SPARC-NEXT: addxcc %g2, 0, %i1
+; SPARC-NEXT: addcc %l0, %i1, %i1
+; SPARC-NEXT: addxcc %g0, 0, %g2
+; SPARC-NEXT: umul %g3, %g4, %g3
+; SPARC-NEXT: rd %y, %g4
+; SPARC-NEXT: addcc %g3, %i1, %i1
+; SPARC-NEXT: addxcc %g4, %g2, %g2
+; SPARC-NEXT: addcc %i1, %i5, %i1
+; SPARC-NEXT: addxcc %g2, %i4, %g2
+; SPARC-NEXT: .LBB0_49: ! %overflow.res
; SPARC-NEXT: and %i0, 1, %i4
-; SPARC-NEXT: mov %g3, %i0
; SPARC-NEXT: ret
-; SPARC-NEXT: restore %g0, %g2, %o1
+; SPARC-NEXT: restore %g0, %g2, %o0
;
; SPARC64-LABEL: muloti_test:
; SPARC64: .register %g2, #scratch
; SPARC64-NEXT: .register %g3, #scratch
-; SPARC64-NEXT: ! %bb.0: ! %start
+; SPARC64-NEXT: ! %bb.0: ! %overflow.entry
; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: mov %i0, %l1
+; SPARC64-NEXT: brz %i0, .LBB0_3
+; SPARC64-NEXT: mov %i1, %i4
+; SPARC64-NEXT: ! %bb.1: ! %overflow.lhs
+; SPARC64-NEXT: brz %i2, .LBB0_5
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: ! %bb.2: ! %overflow
; SPARC64-NEXT: mov %g0, %o0
; SPARC64-NEXT: mov %i2, %o1
; SPARC64-NEXT: mov %g0, %o2
; SPARC64-NEXT: call __multi3
-; SPARC64-NEXT: mov %i1, %o3
-; SPARC64-NEXT: mov %o0, %i4
-; SPARC64-NEXT: mov %o1, %i5
+; SPARC64-NEXT: mov %i4, %o3
+; SPARC64-NEXT: mov %o0, %i5
+; SPARC64-NEXT: mov %o1, %i1
; SPARC64-NEXT: mov %g0, %o0
; SPARC64-NEXT: mov %i0, %o1
; SPARC64-NEXT: mov %g0, %o2
; SPARC64-NEXT: call __multi3
; SPARC64-NEXT: mov %i3, %o3
; SPARC64-NEXT: mov %o0, %l0
-; SPARC64-NEXT: add %o1, %i5, %i0
+; SPARC64-NEXT: add %o1, %i1, %l1
; SPARC64-NEXT: mov %g0, %o0
-; SPARC64-NEXT: mov %i1, %o1
+; SPARC64-NEXT: mov %i4, %o1
; SPARC64-NEXT: mov %g0, %o2
; SPARC64-NEXT: call __multi3
; SPARC64-NEXT: mov %i3, %o3
-; SPARC64-NEXT: mov %g0, %i1
-; SPARC64-NEXT: mov %g0, %i3
-; SPARC64-NEXT: mov %g0, %i5
+; SPARC64-NEXT: mov %o1, %i1
+; SPARC64-NEXT: mov %g0, %i4
; SPARC64-NEXT: mov %g0, %g2
; SPARC64-NEXT: mov %g0, %g3
-; SPARC64-NEXT: add %o0, %i0, %i0
-; SPARC64-NEXT: cmp %i0, %o0
-; SPARC64-NEXT: movrnz %l0, 1, %i3
-; SPARC64-NEXT: movrnz %i2, 1, %i5
-; SPARC64-NEXT: movrnz %l1, 1, %g2
-; SPARC64-NEXT: movcs %xcc, 1, %i1
-; SPARC64-NEXT: and %g2, %i5, %i2
-; SPARC64-NEXT: or %i2, %i3, %i2
-; SPARC64-NEXT: movrnz %i4, 1, %g3
-; SPARC64-NEXT: or %i2, %g3, %i2
-; SPARC64-NEXT: or %i2, %i1, %i1
-; SPARC64-NEXT: srl %i1, 0, %i2
+; SPARC64-NEXT: mov %g0, %g4
+; SPARC64-NEXT: mov %g0, %g5
+; SPARC64-NEXT: add %o0, %l1, %i3
+; SPARC64-NEXT: cmp %i3, %o0
+; SPARC64-NEXT: movrnz %i2, 1, %g2
+; SPARC64-NEXT: movrnz %i0, 1, %g3
+; SPARC64-NEXT: and %g3, %g2, %i0
+; SPARC64-NEXT: movcs %xcc, 1, %i4
+; SPARC64-NEXT: movrnz %l0, 1, %g4
+; SPARC64-NEXT: or %i0, %g4, %i0
+; SPARC64-NEXT: movrnz %i5, 1, %g5
+; SPARC64-NEXT: or %i0, %g5, %i0
+; SPARC64-NEXT: ba .LBB0_8
+; SPARC64-NEXT: or %i0, %i4, %i0
+; SPARC64-NEXT: .LBB0_3: ! %overflow.no.lhs
+; SPARC64-NEXT: brz %i2, .LBB0_7
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: ! %bb.4: ! %overflow.no.lhs.only
+; SPARC64-NEXT: mov %i0, %o0
+; SPARC64-NEXT: mov %i4, %o1
+; SPARC64-NEXT: mov %g0, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i3, %o3
+; SPARC64-NEXT: mov %o0, %i5
+; SPARC64-NEXT: mov %o1, %i1
+; SPARC64-NEXT: mov %i0, %o0
+; SPARC64-NEXT: mov %i4, %o1
+; SPARC64-NEXT: mov %g0, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i2, %o3
+; SPARC64-NEXT: mov %g0, %i2
+; SPARC64-NEXT: mov %g0, %i0
+; SPARC64-NEXT: add %i5, %o1, %i3
+; SPARC64-NEXT: ba .LBB0_6
+; SPARC64-NEXT: cmp %i3, %i5
+; SPARC64-NEXT: .LBB0_5: ! %overflow.no.rhs.only
+; SPARC64-NEXT: mov %i2, %o0
+; SPARC64-NEXT: mov %i3, %o1
+; SPARC64-NEXT: mov %g0, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i4, %o3
+; SPARC64-NEXT: mov %o0, %i4
+; SPARC64-NEXT: mov %o1, %i1
+; SPARC64-NEXT: mov %i2, %o0
+; SPARC64-NEXT: mov %i3, %o1
+; SPARC64-NEXT: mov %g0, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i0, %o3
+; SPARC64-NEXT: mov %g0, %i2
+; SPARC64-NEXT: mov %g0, %i0
+; SPARC64-NEXT: add %i4, %o1, %i3
+; SPARC64-NEXT: cmp %i3, %i4
+; SPARC64-NEXT: .LBB0_6: ! %overflow.res
+; SPARC64-NEXT: movcs %xcc, 1, %i2
+; SPARC64-NEXT: srl %i2, 0, %i2
+; SPARC64-NEXT: add %o0, %i2, %i2
+; SPARC64-NEXT: ba .LBB0_8
+; SPARC64-NEXT: movrnz %i2, 1, %i0
+; SPARC64-NEXT: .LBB0_7: ! %overflow.no
+; SPARC64-NEXT: mov %i0, %o0
+; SPARC64-NEXT: mov %i4, %o1
+; SPARC64-NEXT: mov %i2, %o2
+; SPARC64-NEXT: call __multi3
+; SPARC64-NEXT: mov %i3, %o3
+; SPARC64-NEXT: mov %o0, %i3
+; SPARC64-NEXT: mov %o1, %i1
+; SPARC64-NEXT: mov %g0, %i0
+; SPARC64-NEXT: .LBB0_8: ! %overflow.res
+; SPARC64-NEXT: and %i0, 1, %i2
; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore %g0, %o1, %o1
+; SPARC64-NEXT: restore %g0, %i3, %o0
;
; SPARC64-VIS3-LABEL: muloti_test:
; SPARC64-VIS3: .register %g2, #scratch
; SPARC64-VIS3-NEXT: .register %g3, #scratch
-; SPARC64-VIS3-NEXT: ! %bb.0: ! %start
+; SPARC64-VIS3-NEXT: ! %bb.0: ! %overflow.entry
; SPARC64-VIS3-NEXT: save %sp, -128, %sp
+; SPARC64-VIS3-NEXT: brz %i0, .LBB0_3
+; SPARC64-VIS3-NEXT: nop
+; SPARC64-VIS3-NEXT: ! %bb.1: ! %overflow.lhs
+; SPARC64-VIS3-NEXT: brz %i2, .LBB0_5
+; SPARC64-VIS3-NEXT: nop
+; SPARC64-VIS3-NEXT: ! %bb.2: ! %overflow
; SPARC64-VIS3-NEXT: mov %g0, %i5
; SPARC64-VIS3-NEXT: mov %g0, %g2
; SPARC64-VIS3-NEXT: mov %g0, %g3
@@ -227,9 +490,59 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
; SPARC64-VIS3-NEXT: umulxhi %i2, %i1, %i2
; SPARC64-VIS3-NEXT: movrnz %i2, 1, %g5
; SPARC64-VIS3-NEXT: or %i0, %g5, %i0
-; SPARC64-VIS3-NEXT: or %i0, %i5, %i0
+; SPARC64-VIS3-NEXT: ba .LBB0_7
+; SPARC64-VIS3-NEXT: or %i0, %i5, %i5
+; SPARC64-VIS3-NEXT: .LBB0_3: ! %overflow.no.lhs
+; SPARC64-VIS3-NEXT: brz %i2, .LBB0_6
+; SPARC64-VIS3-NEXT: nop
+; SPARC64-VIS3-NEXT: ! %bb.4: ! %overflow.no.lhs.only
+; SPARC64-VIS3-NEXT: mov %g0, %g2
+; SPARC64-VIS3-NEXT: mov %g0, %i5
+; SPARC64-VIS3-NEXT: mulx %i0, %i3, %i4
+; SPARC64-VIS3-NEXT: umulxhi %i1, %i3, %g3
+; SPARC64-VIS3-NEXT: add %g3, %i4, %g3
+; SPARC64-VIS3-NEXT: mulx %i0, %i2, %i0
+; SPARC64-VIS3-NEXT: umulxhi %i1, %i2, %i4
+; SPARC64-VIS3-NEXT: add %i4, %i0, %i0
+; SPARC64-VIS3-NEXT: mulx %i1, %i3, %i3
+; SPARC64-VIS3-NEXT: mulx %i1, %i2, %i2
+; SPARC64-VIS3-NEXT: mov %i3, %i1
+; SPARC64-VIS3-NEXT: add %g3, %i2, %i4
+; SPARC64-VIS3-NEXT: cmp %i4, %g3
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %g2
+; SPARC64-VIS3-NEXT: srl %g2, 0, %i2
+; SPARC64-VIS3-NEXT: add %i0, %i2, %i0
+; SPARC64-VIS3-NEXT: ba .LBB0_8
+; SPARC64-VIS3-NEXT: movrnz %i0, 1, %i5
+; SPARC64-VIS3-NEXT: .LBB0_5: ! %overflow.no.rhs.only
+; SPARC64-VIS3-NEXT: mov %g0, %g2
+; SPARC64-VIS3-NEXT: mov %g0, %i5
+; SPARC64-VIS3-NEXT: mulx %i2, %i1, %i4
+; SPARC64-VIS3-NEXT: umulxhi %i3, %i1, %g3
+; SPARC64-VIS3-NEXT: add %g3, %i4, %g3
+; SPARC64-VIS3-NEXT: mulx %i2, %i0, %i2
+; SPARC64-VIS3-NEXT: umulxhi %i3, %i0, %i4
+; SPARC64-VIS3-NEXT: add %i4, %i2, %i2
+; SPARC64-VIS3-NEXT: mulx %i3, %i1, %i1
+; SPARC64-VIS3-NEXT: mulx %i3, %i0, %i0
+; SPARC64-VIS3-NEXT: add %g3, %i0, %i4
+; SPARC64-VIS3-NEXT: cmp %i4, %g3
+; SPARC64-VIS3-NEXT: movcs %xcc, 1, %g2
+; SPARC64-VIS3-NEXT: srl %g2, 0, %i0
+; SPARC64-VIS3-NEXT: add %i2, %i0, %i0
+; SPARC64-VIS3-NEXT: ba .LBB0_8
+; SPARC64-VIS3-NEXT: movrnz %i0, 1, %i5
+; SPARC64-VIS3-NEXT: .LBB0_6: ! %overflow.no
+; SPARC64-VIS3-NEXT: mov %g0, %i5
+; SPARC64-VIS3-NEXT: mulx %i1, %i2, %i2
+; SPARC64-VIS3-NEXT: umulxhi %i1, %i3, %i4
+; SPARC64-VIS3-NEXT: add %i4, %i2, %i2
+; SPARC64-VIS3-NEXT: mulx %i0, %i3, %i0
+; SPARC64-VIS3-NEXT: add %i2, %i0, %i4
+; SPARC64-VIS3-NEXT: .LBB0_7: ! %overflow.res
; SPARC64-VIS3-NEXT: mulx %i1, %i3, %i1
-; SPARC64-VIS3-NEXT: srl %i0, 0, %i2
+; SPARC64-VIS3-NEXT: .LBB0_8: ! %overflow.res
+; SPARC64-VIS3-NEXT: and %i5, 1, %i2
; SPARC64-VIS3-NEXT: ret
; SPARC64-VIS3-NEXT: restore %g0, %i4, %o0
start:
diff --git a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
index 9b5fa1c2bc811..c19ce3f34011e 100644
--- a/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll
@@ -3,200 +3,568 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV6-LABEL: muloti_test:
-; THUMBV6: @ %bb.0: @ %start
+; THUMBV6: @ %bb.0: @ %overflow.entry
; THUMBV6-NEXT: .save {r4, r5, r6, r7, lr}
; THUMBV6-NEXT: push {r4, r5, r6, r7, lr}
-; THUMBV6-NEXT: .pad #60
-; THUMBV6-NEXT: sub sp, #60
+; THUMBV6-NEXT: .pad #84
+; THUMBV6-NEXT: sub sp, #84
; THUMBV6-NEXT: mov r6, r3
-; THUMBV6-NEXT: mov r1, r2
-; THUMBV6-NEXT: str r2, [sp, #52] @ 4-byte Spill
-; THUMBV6-NEXT: mov r4, r0
+; THUMBV6-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #108]
+; THUMBV6-NEXT: ldr r5, [sp, #104]
+; THUMBV6-NEXT: str r5, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; THUMBV6-NEXT: orrs r5, r0
+; THUMBV6-NEXT: ldr r1, [sp, #124]
+; THUMBV6-NEXT: ldr r4, [sp, #120]
+; THUMBV6-NEXT: ldr r0, [sp, #116]
+; THUMBV6-NEXT: str r0, [sp, #68] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r3, [sp, #112]
+; THUMBV6-NEXT: str r4, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #60] @ 4-byte Spill
+; THUMBV6-NEXT: str r2, [sp, #72] @ 4-byte Spill
+; THUMBV6-NEXT: str r6, [sp, #76] @ 4-byte Spill
+; THUMBV6-NEXT: str r3, [sp, #64] @ 4-byte Spill
+; THUMBV6-NEXT: bne .LBB0_1
+; THUMBV6-NEXT: b .LBB0_3
+; THUMBV6-NEXT: .LBB0_1: @ %overflow.lhs
+; THUMBV6-NEXT: orrs r4, r1
+; THUMBV6-NEXT: bne .LBB0_2
+; THUMBV6-NEXT: b .LBB0_5
+; THUMBV6-NEXT: .LBB0_2: @ %overflow
+; THUMBV6-NEXT: str r4, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: movs r4, #0
+; THUMBV6-NEXT: mov r0, r6
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r7, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r6, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r6
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r1, [sp, #24] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r1, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r2, [sp, #88]
-; THUMBV6-NEXT: str r2, [sp, #48] @ 4-byte Spill
-; THUMBV6-NEXT: movs r5, #0
-; THUMBV6-NEXT: mov r0, r1
-; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: mov r0, r7
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r6
+; THUMBV6-NEXT: mov r3, r4
; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r1, r0
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: adcs r1, r4
; THUMBV6-NEXT: str r1, [sp, #28] @ 4-byte Spill
-; THUMBV6-NEXT: str r0, [r4]
-; THUMBV6-NEXT: ldr r2, [sp, #96]
-; THUMBV6-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; THUMBV6-NEXT: mov r4, r6
-; THUMBV6-NEXT: str r6, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r7, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; THUMBV6-NEXT: str r5, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r5, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: mov r0, r5
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r6, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r6
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; THUMBV6-NEXT: mov r0, r7
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r6
+; THUMBV6-NEXT: mov r7, r6
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; THUMBV6-NEXT: adds r2, r1, r2
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: adcs r1, r4
+; THUMBV6-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r1, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r2, r0
+; THUMBV6-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r6, [sp, #72] @ 4-byte Reload
; THUMBV6-NEXT: mov r0, r6
-; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: mov r7, r1
-; THUMBV6-NEXT: subs r0, r1, #1
-; THUMBV6-NEXT: sbcs r7, r0
-; THUMBV6-NEXT: ldr r0, [sp, #100]
-; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r6, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r1, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r7, r4
+; THUMBV6-NEXT: mov r0, r6
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r6, [sp, #68] @ 4-byte Reload
; THUMBV6-NEXT: mov r2, r6
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: mov r3, r4
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; THUMBV6-NEXT: subs r2, r1, #1
-; THUMBV6-NEXT: sbcs r1, r2
-; THUMBV6-NEXT: subs r2, r4, #1
+; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r4
+; THUMBV6-NEXT: adds r0, r7, r1
+; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; THUMBV6-NEXT: mov r7, r4
+; THUMBV6-NEXT: adcs r7, r4
+; THUMBV6-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r6
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r7
+; THUMBV6-NEXT: str r1, [sp] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r6
+; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r6, r0
+; THUMBV6-NEXT: str r1, [sp, #64] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r7, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r7
+; THUMBV6-NEXT: mov r2, r4
; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: adds r0, r0, r6
+; THUMBV6-NEXT: ldr r2, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r2
+; THUMBV6-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r2, r0
+; THUMBV6-NEXT: ldr r2, [sp] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r2
+; THUMBV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: str r0, [sp, #72] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r0
+; THUMBV6-NEXT: adcs r4, r4
+; THUMBV6-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r3, #1
; THUMBV6-NEXT: sbcs r3, r2
-; THUMBV6-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
-; THUMBV6-NEXT: subs r2, r4, #1
-; THUMBV6-NEXT: sbcs r4, r2
-; THUMBV6-NEXT: ands r4, r3
-; THUMBV6-NEXT: orrs r4, r1
-; THUMBV6-NEXT: orrs r4, r7
-; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT: adds r7, r1, r0
+; THUMBV6-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r0, #1
+; THUMBV6-NEXT: sbcs r0, r2
+; THUMBV6-NEXT: subs r2, r7, #1
+; THUMBV6-NEXT: sbcs r7, r2
+; THUMBV6-NEXT: mov r6, r7
+; THUMBV6-NEXT: ldr r7, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r7, #1
+; THUMBV6-NEXT: sbcs r7, r2
+; THUMBV6-NEXT: ands r7, r6
+; THUMBV6-NEXT: orrs r7, r0
+; THUMBV6-NEXT: orrs r7, r3
+; THUMBV6-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT: orrs r7, r0
+; THUMBV6-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r0, #1
+; THUMBV6-NEXT: sbcs r0, r2
+; THUMBV6-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r3, #1
+; THUMBV6-NEXT: sbcs r3, r2
+; THUMBV6-NEXT: mov r6, r3
+; THUMBV6-NEXT: ldr r3, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r3, #1
+; THUMBV6-NEXT: sbcs r3, r2
+; THUMBV6-NEXT: subs r2, r5, #1
+; THUMBV6-NEXT: sbcs r5, r2
+; THUMBV6-NEXT: ands r5, r3
+; THUMBV6-NEXT: orrs r5, r6
+; THUMBV6-NEXT: orrs r5, r0
+; THUMBV6-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: orrs r5, r2
+; THUMBV6-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r3, #1
+; THUMBV6-NEXT: sbcs r3, r2
+; THUMBV6-NEXT: mov r6, r3
+; THUMBV6-NEXT: ldr r2, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: subs r2, r2, #1
+; THUMBV6-NEXT: ldr r3, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: sbcs r3, r2
+; THUMBV6-NEXT: str r3, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r2, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: ands r2, r6
+; THUMBV6-NEXT: str r2, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r2, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: orrs r2, r5
+; THUMBV6-NEXT: str r2, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r5, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: orrs r5, r7
+; THUMBV6-NEXT: orrs r5, r4
+; THUMBV6-NEXT: b .LBB0_8
+; THUMBV6-NEXT: .LBB0_3: @ %overflow.no.lhs
+; THUMBV6-NEXT: mov r0, r4
+; THUMBV6-NEXT: orrs r0, r1
+; THUMBV6-NEXT: bne .LBB0_4
+; THUMBV6-NEXT: b .LBB0_7
+; THUMBV6-NEXT: .LBB0_4: @ %overflow.no.lhs.only
+; THUMBV6-NEXT: mov r5, r4
+; THUMBV6-NEXT: movs r4, #0
+; THUMBV6-NEXT: mov r0, r2
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r7, r2
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: str r5, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: mov r0, r6
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r6, r1
+; THUMBV6-NEXT: ldr r1, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r6, r4
+; THUMBV6-NEXT: mov r0, r7
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r5, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: ldr r2, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r4
+; THUMBV6-NEXT: adds r0, r6, r1
+; THUMBV6-NEXT: str r0, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: mov r7, r4
+; THUMBV6-NEXT: adcs r7, r4
+; THUMBV6-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r6, r1
+; THUMBV6-NEXT: ldr r1, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r6, r7
; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: mov r2, r6
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r7, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: mov r0, r7
+; THUMBV6-NEXT: ldr r5, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r5
+; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: mov r3, r4
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; THUMBV6-NEXT: adds r0, r1, r7
-; THUMBV6-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; THUMBV6-NEXT: mov r0, r5
-; THUMBV6-NEXT: adcs r0, r5
-; THUMBV6-NEXT: orrs r0, r4
-; THUMBV6-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #92]
+; THUMBV6-NEXT: str r1, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r2, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT: adds r3, r0, r2
+; THUMBV6-NEXT: ldr r2, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r2, r1
+; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r3
+; THUMBV6-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r2, r6
+; THUMBV6-NEXT: str r2, [sp, #80] @ 4-byte Spill
+; THUMBV6-NEXT: mov r0, r7
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r7, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r7, [sp, #80]
+; THUMBV6-NEXT: mov r6, r1
+; THUMBV6-NEXT: mov r0, r5
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r7, r1
+; THUMBV6-NEXT: adds r6, r0, r6
+; THUMBV6-NEXT: adcs r7, r4
+; THUMBV6-NEXT: ldr r0, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r5, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: adds r0, r0, r6
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r4
+; THUMBV6-NEXT: adds r0, r7, r1
+; THUMBV6-NEXT: str r0, [sp, #72] @ 4-byte Spill
+; THUMBV6-NEXT: mov r7, r4
+; THUMBV6-NEXT: adcs r7, r4
+; THUMBV6-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r6, r1
+; THUMBV6-NEXT: ldr r1, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #76] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r6, r7
+; THUMBV6-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r5, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r2, r0
+; THUMBV6-NEXT: ldr r2, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r2
+; THUMBV6-NEXT: ldr r2, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r2, r0
+; THUMBV6-NEXT: adcs r1, r6
+; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r2
+; THUMBV6-NEXT: ldr r2, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT: b .LBB0_6
+; THUMBV6-NEXT: .LBB0_5: @ %overflow.no.rhs.only
+; THUMBV6-NEXT: movs r4, #0
+; THUMBV6-NEXT: mov r0, r3
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r7, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r2, r7
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: mov r5, r3
+; THUMBV6-NEXT: mov r3, r4
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; THUMBV6-NEXT: mov r4, r1
-; THUMBV6-NEXT: subs r0, r1, #1
-; THUMBV6-NEXT: sbcs r4, r0
-; THUMBV6-NEXT: ldr r6, [sp, #84]
+; THUMBV6-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r6, r1
+; THUMBV6-NEXT: ldr r1, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: adds r7, r0, r1
+; THUMBV6-NEXT: adcs r6, r4
+; THUMBV6-NEXT: mov r0, r5
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r5, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: adds r0, r0, r7
+; THUMBV6-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r4
+; THUMBV6-NEXT: adds r0, r6, r1
+; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r6, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: mov r7, r4
+; THUMBV6-NEXT: adcs r7, r4
; THUMBV6-NEXT: mov r0, r6
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r7
+; THUMBV6-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: ldr r2, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #60] @ 4-byte Reload
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; THUMBV6-NEXT: subs r2, r1, #1
-; THUMBV6-NEXT: sbcs r1, r2
-; THUMBV6-NEXT: ldr r3, [sp, #44] @ 4-byte Reload
-; THUMBV6-NEXT: subs r2, r3, #1
-; THUMBV6-NEXT: sbcs r3, r2
-; THUMBV6-NEXT: str r6, [sp, #8] @ 4-byte Spill
-; THUMBV6-NEXT: subs r2, r6, #1
-; THUMBV6-NEXT: sbcs r6, r2
-; THUMBV6-NEXT: ands r6, r3
-; THUMBV6-NEXT: orrs r6, r1
-; THUMBV6-NEXT: orrs r6, r4
-; THUMBV6-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r1, r0
-; THUMBV6-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; THUMBV6-NEXT: str r0, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT: mov r5, r1
+; THUMBV6-NEXT: ldr r7, [sp, #64] @ 4-byte Reload
; THUMBV6-NEXT: mov r0, r7
-; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r4, [sp, #48] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r6
; THUMBV6-NEXT: mov r2, r4
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: mov r3, r4
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r1, r0
-; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: adcs r1, r5
-; THUMBV6-NEXT: orrs r1, r6
-; THUMBV6-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #52] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: adds r2, r0, r2
+; THUMBV6-NEXT: adcs r5, r1
+; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: str r0, [sp, #56] @ 4-byte Spill
+; THUMBV6-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r5, r0
+; THUMBV6-NEXT: mov r0, r7
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r7, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: mov r0, r6
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r7
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r7, r1
+; THUMBV6-NEXT: ldr r1, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r7, r4
+; THUMBV6-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: ldr r6, [sp, #76] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r6
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r4
+; THUMBV6-NEXT: adds r0, r7, r1
+; THUMBV6-NEXT: str r0, [sp, #64] @ 4-byte Spill
+; THUMBV6-NEXT: mov r7, r4
+; THUMBV6-NEXT: adcs r7, r4
+; THUMBV6-NEXT: ldr r0, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r4
+; THUMBV6-NEXT: mov r2, r6
+; THUMBV6-NEXT: mov r3, r4
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r6, r1
+; THUMBV6-NEXT: ldr r1, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #68] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r6, r7
+; THUMBV6-NEXT: add r2, sp, #72
+; THUMBV6-NEXT: ldm r2, {r0, r1, r2} @ 12-byte Folded Reload
+; THUMBV6-NEXT: ldr r3, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r2, r0
+; THUMBV6-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r2
+; THUMBV6-NEXT: ldr r2, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r2, r0
+; THUMBV6-NEXT: adcs r1, r6
+; THUMBV6-NEXT: ldr r2, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r2
; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV6-NEXT: orrs r3, r2
-; THUMBV6-NEXT: subs r2, r3, #1
-; THUMBV6-NEXT: sbcs r3, r2
-; THUMBV6-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; THUMBV6-NEXT: orrs r7, r2
-; THUMBV6-NEXT: subs r2, r7, #1
-; THUMBV6-NEXT: sbcs r7, r2
-; THUMBV6-NEXT: ands r7, r3
-; THUMBV6-NEXT: orrs r7, r1
-; THUMBV6-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
-; THUMBV6-NEXT: orrs r7, r1
-; THUMBV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV6-NEXT: adds r1, r2, r1
-; THUMBV6-NEXT: str r1, [sp, #32] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
-; THUMBV6-NEXT: adcs r0, r1
-; THUMBV6-NEXT: str r0, [sp, #36] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r2
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: .LBB0_6: @ %overflow.res
+; THUMBV6-NEXT: adcs r2, r4
+; THUMBV6-NEXT: adcs r5, r4
+; THUMBV6-NEXT: orrs r5, r2
+; THUMBV6-NEXT: subs r2, r5, #1
+; THUMBV6-NEXT: sbcs r5, r2
+; THUMBV6-NEXT: b .LBB0_8
+; THUMBV6-NEXT: .LBB0_7: @ %overflow.no
+; THUMBV6-NEXT: movs r5, #0
+; THUMBV6-NEXT: mov r0, r2
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: mov r7, r2
+; THUMBV6-NEXT: mov r2, r3
+; THUMBV6-NEXT: mov r4, r3
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: mov r4, r1
-; THUMBV6-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
-; THUMBV6-NEXT: adds r6, r0, r1
-; THUMBV6-NEXT: adcs r4, r5
-; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; THUMBV6-NEXT: str r1, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: mov r0, r6
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r4
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: adds r0, r0, r6
-; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
-; THUMBV6-NEXT: str r0, [r2, #4]
-; THUMBV6-NEXT: adcs r1, r5
-; THUMBV6-NEXT: adds r0, r4, r1
-; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
-; THUMBV6-NEXT: mov r6, r5
+; THUMBV6-NEXT: mov r4, r6
+; THUMBV6-NEXT: mov r6, r1
+; THUMBV6-NEXT: ldr r1, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
; THUMBV6-NEXT: adcs r6, r5
-; THUMBV6-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: mov r0, r7
; THUMBV6-NEXT: mov r1, r5
-; THUMBV6-NEXT: ldr r4, [sp, #44] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r4
+; THUMBV6-NEXT: ldr r7, [sp, #68] @ 4-byte Reload
+; THUMBV6-NEXT: mov r2, r7
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
; THUMBV6-NEXT: adds r0, r0, r2
-; THUMBV6-NEXT: str r0, [sp, #28] @ 4-byte Spill
-; THUMBV6-NEXT: adcs r1, r6
-; THUMBV6-NEXT: str r1, [sp, #24] @ 4-byte Spill
-; THUMBV6-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; THUMBV6-NEXT: mov r1, r4
-; THUMBV6-NEXT: mov r2, r5
+; THUMBV6-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r1, r5
+; THUMBV6-NEXT: adds r0, r6, r1
+; THUMBV6-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: mov r6, r7
+; THUMBV6-NEXT: mov r7, r5
+; THUMBV6-NEXT: adcs r7, r5
+; THUMBV6-NEXT: mov r0, r4
+; THUMBV6-NEXT: mov r1, r5
+; THUMBV6-NEXT: mov r2, r6
; THUMBV6-NEXT: mov r3, r5
; THUMBV6-NEXT: bl __aeabi_lmul
-; THUMBV6-NEXT: mov r6, r0
; THUMBV6-NEXT: mov r4, r1
-; THUMBV6-NEXT: ldr r0, [sp, #52] @ 4-byte Reload
-; THUMBV6-NEXT: ldr r1, [sp, #56] @ 4-byte Reload
-; THUMBV6-NEXT: mov r2, r5
-; THUMBV6-NEXT: mov r3, r5
+; THUMBV6-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; THUMBV6-NEXT: adds r0, r0, r1
+; THUMBV6-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; THUMBV6-NEXT: adcs r4, r7
+; THUMBV6-NEXT: ldr r0, [sp, #64] @ 4-byte Reload
+; THUMBV6-NEXT: mov r1, r6
+; THUMBV6-NEXT: ldr r2, [sp, #56] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #52] @ 4-byte Reload
+; THUMBV6-NEXT: bl __aeabi_lmul
+; THUMBV6-NEXT: mov r6, r0
+; THUMBV6-NEXT: mov r7, r1
+; THUMBV6-NEXT: ldr r0, [sp, #80] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r2, [sp, #72] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #76] @ 4-byte Reload
; THUMBV6-NEXT: bl __aeabi_lmul
; THUMBV6-NEXT: adds r0, r0, r6
-; THUMBV6-NEXT: adcs r1, r4
-; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r7
+; THUMBV6-NEXT: ldr r2, [sp, #36] @ 4-byte Reload
; THUMBV6-NEXT: adds r0, r2, r0
-; THUMBV6-NEXT: ldr r2, [sp, #24] @ 4-byte Reload
-; THUMBV6-NEXT: adcs r1, r2
-; THUMBV6-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV6-NEXT: adds r0, r0, r2
-; THUMBV6-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: adcs r1, r4
+; THUMBV6-NEXT: .LBB0_8: @ %overflow.res
+; THUMBV6-NEXT: ldr r2, [sp, #48] @ 4-byte Reload
+; THUMBV6-NEXT: ldr r3, [sp, #44] @ 4-byte Reload
+; THUMBV6-NEXT: str r3, [r2]
+; THUMBV6-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
+; THUMBV6-NEXT: str r3, [r2, #4]
; THUMBV6-NEXT: str r0, [r2, #8]
-; THUMBV6-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; THUMBV6-NEXT: adcs r1, r0
; THUMBV6-NEXT: str r1, [r2, #12]
-; THUMBV6-NEXT: adcs r5, r5
-; THUMBV6-NEXT: orrs r5, r7
; THUMBV6-NEXT: movs r0, #1
; THUMBV6-NEXT: ands r0, r5
; THUMBV6-NEXT: strb r0, [r2, #16]
-; THUMBV6-NEXT: add sp, #60
+; THUMBV6-NEXT: add sp, #84
; THUMBV6-NEXT: pop {r4, r5, r6, r7, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index fe1d06cb39e16..07cd9788d91e1 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -3,125 +3,211 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-LABEL: muloti_test:
-; THUMBV7: @ %bb.0: @ %start
+; THUMBV7: @ %bb.0: @ %overflow.entry
; THUMBV7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; THUMBV7-NEXT: .pad #44
-; THUMBV7-NEXT: sub sp, #44
-; THUMBV7-NEXT: ldr.w r8, [sp, #88]
-; THUMBV7-NEXT: mov r9, r0
-; THUMBV7-NEXT: ldr r7, [sp, #96]
-; THUMBV7-NEXT: ldr.w lr, [sp, #100]
-; THUMBV7-NEXT: umull r0, r5, r2, r8
-; THUMBV7-NEXT: ldr r4, [sp, #80]
-; THUMBV7-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; THUMBV7-NEXT: umull r1, r0, r3, r7
-; THUMBV7-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r11, lr, r2
-; THUMBV7-NEXT: str r1, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r1, [sp, #92]
-; THUMBV7-NEXT: str r0, [sp] @ 4-byte Spill
-; THUMBV7-NEXT: umull r0, r10, r7, r2
-; THUMBV7-NEXT: mov r7, r1
-; THUMBV7-NEXT: umull r6, r12, r1, r4
-; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r0, [sp, #84]
-; THUMBV7-NEXT: str r6, [sp, #24] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r1, r0, r8
-; THUMBV7-NEXT: str r6, [sp, #16] @ 4-byte Spill
-; THUMBV7-NEXT: umull r6, r2, r2, r7
-; THUMBV7-NEXT: mov r7, r4
-; THUMBV7-NEXT: strd r6, r2, [sp, #8] @ 8-byte Folded Spill
-; THUMBV7-NEXT: umull r2, r6, r4, r8
-; THUMBV7-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT: str r6, [sp, #28] @ 4-byte Spill
-; THUMBV7-NEXT: movs r6, #0
-; THUMBV7-NEXT: str.w r2, [r9]
-; THUMBV7-NEXT: umlal r5, r6, r3, r8
-; THUMBV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r4, [sp] @ 4-byte Reload
-; THUMBV7-NEXT: add r4, r2
-; THUMBV7-NEXT: adds.w r2, r10, r4
-; THUMBV7-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; THUMBV7-NEXT: mov.w r2, #0
-; THUMBV7-NEXT: adc r2, r2, #0
-; THUMBV7-NEXT: cmp.w r12, #0
-; THUMBV7-NEXT: str r2, [sp, #32] @ 4-byte Spill
+; THUMBV7-NEXT: .pad #12
+; THUMBV7-NEXT: sub sp, #12
+; THUMBV7-NEXT: ldrd r11, r6, [sp, #48]
+; THUMBV7-NEXT: ldrd r10, r5, [sp, #64]
+; THUMBV7-NEXT: ldrd r9, r12, [sp, #56]
+; THUMBV7-NEXT: orrs.w r1, r11, r6
+; THUMBV7-NEXT: beq .LBB0_3
+; THUMBV7-NEXT: @ %bb.1: @ %overflow.lhs
+; THUMBV7-NEXT: orr.w r4, r10, r5
+; THUMBV7-NEXT: cmp r4, #0
+; THUMBV7-NEXT: beq.w .LBB0_5
+; THUMBV7-NEXT: @ %bb.2: @ %overflow
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r12, #1
+; THUMBV7-NEXT: movne r4, #1
; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: ldr r2, [sp, #96]
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
-; THUMBV7-NEXT: orrs.w r10, r7, r0
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r10, #1
-; THUMBV7-NEXT: orrs.w r7, r2, lr
-; THUMBV7-NEXT: ldr r2, [sp, #92]
-; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r7, #1
-; THUMBV7-NEXT: cmp r0, #0
+; THUMBV7-NEXT: and.w lr, r1, r4
+; THUMBV7-NEXT: umull r7, r4, r6, r9
+; THUMBV7-NEXT: cmp.w r12, #0
+; THUMBV7-NEXT: mov r1, r12
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r0, #1
-; THUMBV7-NEXT: cmp r2, #0
-; THUMBV7-NEXT: mov r4, r2
-; THUMBV7-NEXT: mov r8, r2
+; THUMBV7-NEXT: movne r1, #1
+; THUMBV7-NEXT: cmp r6, #0
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r4, #1
-; THUMBV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; THUMBV7-NEXT: ands r0, r4
-; THUMBV7-NEXT: movs r4, #0
-; THUMBV7-NEXT: adds r5, r5, r2
-; THUMBV7-NEXT: str.w r5, [r9, #4]
-; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
-; THUMBV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
-; THUMBV7-NEXT: and.w r5, r10, r7
-; THUMBV7-NEXT: orr.w r0, r0, r12
-; THUMBV7-NEXT: mov.w r12, #0
-; THUMBV7-NEXT: add r1, r2
-; THUMBV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r2, r6
-; THUMBV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
-; THUMBV7-NEXT: adc r7, r4, #0
-; THUMBV7-NEXT: adds r1, r1, r6
-; THUMBV7-NEXT: umlal r2, r7, r3, r8
-; THUMBV7-NEXT: adc r4, r4, #0
-; THUMBV7-NEXT: orrs r0, r4
-; THUMBV7-NEXT: orrs r0, r5
-; THUMBV7-NEXT: ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload
-; THUMBV7-NEXT: adds r5, r5, r4
-; THUMBV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; THUMBV7-NEXT: adcs r1, r4
-; THUMBV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT: movne r6, #1
+; THUMBV7-NEXT: ands r1, r6
; THUMBV7-NEXT: cmp r4, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r4, #1
+; THUMBV7-NEXT: orrs r1, r4
+; THUMBV7-NEXT: umull r4, r6, r12, r11
+; THUMBV7-NEXT: cmp r6, #0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne r6, #1
+; THUMBV7-NEXT: orrs r6, r1
+; THUMBV7-NEXT: adds r1, r7, r4
+; THUMBV7-NEXT: umull r11, r4, r11, r9
+; THUMBV7-NEXT: adds.w r8, r4, r1
+; THUMBV7-NEXT: mov.w r1, #0
+; THUMBV7-NEXT: adc r4, r1, #0
; THUMBV7-NEXT: cmp r3, #0
+; THUMBV7-NEXT: orr.w r4, r4, r6
+; THUMBV7-NEXT: umull r7, r6, r5, r2
+; THUMBV7-NEXT: orr.w lr, lr, r4
+; THUMBV7-NEXT: mov r4, r3
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne r4, #1
+; THUMBV7-NEXT: cmp r5, #0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne r5, #1
+; THUMBV7-NEXT: ands r4, r5
+; THUMBV7-NEXT: cmp r6, #0
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne r3, #1
-; THUMBV7-NEXT: cmp.w lr, #0
+; THUMBV7-NEXT: movne r6, #1
+; THUMBV7-NEXT: orrs r4, r6
+; THUMBV7-NEXT: umull r5, r6, r3, r10
+; THUMBV7-NEXT: cmp r6, #0
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: cmp.w r11, #0
+; THUMBV7-NEXT: movne r6, #1
+; THUMBV7-NEXT: orrs r4, r6
+; THUMBV7-NEXT: add r5, r7
+; THUMBV7-NEXT: umull r6, r7, r10, r2
+; THUMBV7-NEXT: adds r5, r5, r7
+; THUMBV7-NEXT: adc r7, r1, #0
+; THUMBV7-NEXT: adds.w r6, r6, r11
+; THUMBV7-NEXT: orr.w r4, r4, r7
+; THUMBV7-NEXT: mov.w r7, #0
+; THUMBV7-NEXT: orr.w lr, lr, r4
+; THUMBV7-NEXT: umull r11, r4, r2, r9
+; THUMBV7-NEXT: adc.w r10, r8, r5
+; THUMBV7-NEXT: umlal r4, r7, r3, r9
+; THUMBV7-NEXT: umull r2, r5, r2, r12
+; THUMBV7-NEXT: adds.w r8, r2, r4
+; THUMBV7-NEXT: adcs.w r2, r7, r5
+; THUMBV7-NEXT: adc r4, r1, #0
+; THUMBV7-NEXT: umlal r2, r4, r3, r12
+; THUMBV7-NEXT: adds r2, r2, r6
+; THUMBV7-NEXT: adcs.w r3, r4, r10
+; THUMBV7-NEXT: adc r1, r1, #0
+; THUMBV7-NEXT: orr.w r1, r1, lr
+; THUMBV7-NEXT: b .LBB0_8
+; THUMBV7-NEXT: .LBB0_3: @ %overflow.no.lhs
+; THUMBV7-NEXT: orrs.w r1, r10, r5
+; THUMBV7-NEXT: beq.w .LBB0_7
+; THUMBV7-NEXT: @ %bb.4: @ %overflow.no.lhs.only
+; THUMBV7-NEXT: umull r1, lr, r2, r10
+; THUMBV7-NEXT: movs r7, #0
+; THUMBV7-NEXT: umlal lr, r7, r3, r10
+; THUMBV7-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT: umull r4, r8, r2, r5
+; THUMBV7-NEXT: adds.w r1, r4, lr
+; THUMBV7-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT: adcs.w r7, r7, r8
+; THUMBV7-NEXT: mov.w r1, #0
+; THUMBV7-NEXT: adc lr, r1, #0
+; THUMBV7-NEXT: umull r8, r1, r10, r11
+; THUMBV7-NEXT: mla r1, r10, r6, r1
+; THUMBV7-NEXT: umlal r7, lr, r3, r5
+; THUMBV7-NEXT: mla r1, r5, r11, r1
+; THUMBV7-NEXT: adds.w r5, r7, r8
+; THUMBV7-NEXT: umull r4, r7, r2, r9
+; THUMBV7-NEXT: adc.w r10, lr, r1
+; THUMBV7-NEXT: movs r1, #0
+; THUMBV7-NEXT: umlal r7, r1, r3, r9
+; THUMBV7-NEXT: umull r2, lr, r2, r12
+; THUMBV7-NEXT: adds.w r8, r2, r7
+; THUMBV7-NEXT: mov.w r2, #0
+; THUMBV7-NEXT: adcs.w r1, r1, lr
+; THUMBV7-NEXT: adc r2, r2, #0
+; THUMBV7-NEXT: umlal r1, r2, r3, r12
+; THUMBV7-NEXT: umull r3, r7, r9, r11
+; THUMBV7-NEXT: mla r7, r9, r6, r7
+; THUMBV7-NEXT: adds r1, r1, r3
+; THUMBV7-NEXT: mla r7, r12, r11, r7
+; THUMBV7-NEXT: mov r11, r4
+; THUMBV7-NEXT: adc.w r3, r2, r7
+; THUMBV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT: adds r2, r2, r1
+; THUMBV7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT: adcs r3, r1
+; THUMBV7-NEXT: adcs r1, r5, #0
+; THUMBV7-NEXT: adc r7, r10, #0
+; THUMBV7-NEXT: b .LBB0_6
+; THUMBV7-NEXT: .LBB0_5: @ %overflow.no.rhs.only
+; THUMBV7-NEXT: umull r1, r4, r9, r11
+; THUMBV7-NEXT: movs r7, #0
+; THUMBV7-NEXT: mov.w r8, #0
+; THUMBV7-NEXT: umlal r4, r7, r12, r11
+; THUMBV7-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT: umull r1, lr, r9, r6
+; THUMBV7-NEXT: adds r1, r1, r4
+; THUMBV7-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT: adcs.w r7, r7, lr
+; THUMBV7-NEXT: umull lr, r1, r11, r10
+; THUMBV7-NEXT: adc r4, r8, #0
+; THUMBV7-NEXT: mla r1, r11, r5, r1
+; THUMBV7-NEXT: umlal r7, r4, r12, r6
+; THUMBV7-NEXT: mla r1, r6, r10, r1
+; THUMBV7-NEXT: adds.w r7, r7, lr
+; THUMBV7-NEXT: str r7, [sp] @ 4-byte Spill
+; THUMBV7-NEXT: mov.w r7, #0
+; THUMBV7-NEXT: adc.w r11, r4, r1
+; THUMBV7-NEXT: umull lr, r4, r9, r2
+; THUMBV7-NEXT: umlal r4, r7, r12, r2
+; THUMBV7-NEXT: umull r1, r9, r9, r3
+; THUMBV7-NEXT: adds.w r8, r1, r4
+; THUMBV7-NEXT: mov.w r4, #0
+; THUMBV7-NEXT: adcs.w r1, r7, r9
+; THUMBV7-NEXT: umull r7, r6, r2, r10
+; THUMBV7-NEXT: adc r4, r4, #0
+; THUMBV7-NEXT: mla r2, r2, r5, r6
+; THUMBV7-NEXT: umlal r1, r4, r12, r3
+; THUMBV7-NEXT: mla r2, r3, r10, r2
+; THUMBV7-NEXT: adds r1, r1, r7
+; THUMBV7-NEXT: adc.w r3, r4, r2
+; THUMBV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT: adds r2, r2, r1
+; THUMBV7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT: adcs r3, r1
+; THUMBV7-NEXT: ldr r1, [sp] @ 4-byte Reload
+; THUMBV7-NEXT: adcs r1, r1, #0
+; THUMBV7-NEXT: adc r7, r11, #0
+; THUMBV7-NEXT: mov r11, lr
+; THUMBV7-NEXT: .LBB0_6: @ %overflow.res
+; THUMBV7-NEXT: orrs r1, r7
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w r11, #1
-; THUMBV7-NEXT: adds r2, r2, r5
-; THUMBV7-NEXT: and.w r3, r3, lr
-; THUMBV7-NEXT: str.w r2, [r9, #8]
-; THUMBV7-NEXT: adcs r1, r7
-; THUMBV7-NEXT: str.w r1, [r9, #12]
-; THUMBV7-NEXT: orr.w r1, r3, r11
-; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; THUMBV7-NEXT: orr.w r1, r1, r4
-; THUMBV7-NEXT: orr.w r1, r1, r2
-; THUMBV7-NEXT: orr.w r0, r0, r1
-; THUMBV7-NEXT: adc r1, r12, #0
-; THUMBV7-NEXT: orrs r0, r1
-; THUMBV7-NEXT: and r0, r0, #1
-; THUMBV7-NEXT: strb.w r0, [r9, #16]
-; THUMBV7-NEXT: add sp, #44
+; THUMBV7-NEXT: movne r1, #1
+; THUMBV7-NEXT: b .LBB0_8
+; THUMBV7-NEXT: .LBB0_7: @ %overflow.no
+; THUMBV7-NEXT: umull r1, lr, r2, r9
+; THUMBV7-NEXT: movs r4, #0
+; THUMBV7-NEXT: umlal lr, r4, r3, r9
+; THUMBV7-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; THUMBV7-NEXT: movs r1, #0
+; THUMBV7-NEXT: umull r7, r8, r2, r12
+; THUMBV7-NEXT: adds.w r7, r7, lr
+; THUMBV7-NEXT: str r7, [sp] @ 4-byte Spill
+; THUMBV7-NEXT: adcs.w r7, r4, r8
+; THUMBV7-NEXT: ldr r4, [sp, #60]
+; THUMBV7-NEXT: adc r8, r1, #0
+; THUMBV7-NEXT: umlal r7, r8, r3, r12
+; THUMBV7-NEXT: umull r12, lr, r9, r11
+; THUMBV7-NEXT: mla r6, r9, r6, lr
+; THUMBV7-NEXT: str.w r12, [sp, #4] @ 4-byte Spill
+; THUMBV7-NEXT: mla r12, r4, r11, r6
+; THUMBV7-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload
+; THUMBV7-NEXT: umull lr, r6, r10, r2
+; THUMBV7-NEXT: mla r3, r10, r3, r6
+; THUMBV7-NEXT: mla r2, r5, r2, r3
+; THUMBV7-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
+; THUMBV7-NEXT: adds.w r3, r3, lr
+; THUMBV7-NEXT: adc.w r6, r2, r12
+; THUMBV7-NEXT: adds r2, r7, r3
+; THUMBV7-NEXT: adc.w r3, r8, r6
+; THUMBV7-NEXT: ldr.w r8, [sp] @ 4-byte Reload
+; THUMBV7-NEXT: .LBB0_8: @ %overflow.res
+; THUMBV7-NEXT: strd r11, r8, [r0]
+; THUMBV7-NEXT: and r1, r1, #1
+; THUMBV7-NEXT: strd r2, r3, [r0, #8]
+; THUMBV7-NEXT: strb r1, [r0, #16]
+; THUMBV7-NEXT: add sp, #12
; THUMBV7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
start:
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2
diff --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
index 55e917159fce9..997868766d1dd 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
@@ -3,15 +3,19 @@
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-LABEL: mulodi_test:
-; THUMBV7: @ %bb.0: @ %start
+; THUMBV7: @ %bb.0: @ %overflow.entry
; THUMBV7-NEXT: .save {r4, r5, r7, lr}
; THUMBV7-NEXT: push {r4, r5, r7, lr}
-; THUMBV7-NEXT: umull r12, lr, r3, r0
+; THUMBV7-NEXT: cbz r1, .LBB0_3
+; THUMBV7-NEXT: @ %bb.1: @ %overflow.lhs
+; THUMBV7-NEXT: cbz r3, .LBB0_5
+; THUMBV7-NEXT: @ %bb.2: @ %overflow
+; THUMBV7-NEXT: umull lr, r4, r3, r0
; THUMBV7-NEXT: cmp r3, #0
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
; THUMBV7-NEXT: cmp r1, #0
-; THUMBV7-NEXT: umull r0, r4, r0, r2
+; THUMBV7-NEXT: umull r0, r12, r0, r2
; THUMBV7-NEXT: umull r2, r5, r1, r2
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
@@ -20,15 +24,44 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r5, #1
; THUMBV7-NEXT: orrs r1, r5
-; THUMBV7-NEXT: cmp.w lr, #0
+; THUMBV7-NEXT: cmp r4, #0
; THUMBV7-NEXT: it ne
-; THUMBV7-NEXT: movne.w lr, #1
-; THUMBV7-NEXT: orr.w r3, r1, lr
-; THUMBV7-NEXT: add.w r1, r2, r12
+; THUMBV7-NEXT: movne r4, #1
+; THUMBV7-NEXT: orr.w r3, r1, r4
+; THUMBV7-NEXT: add.w r1, r2, lr
; THUMBV7-NEXT: movs r2, #0
-; THUMBV7-NEXT: adds r1, r1, r4
+; THUMBV7-NEXT: adds.w r1, r1, r12
; THUMBV7-NEXT: adc r2, r2, #0
-; THUMBV7-NEXT: orrs r2, r3
+; THUMBV7-NEXT: orr.w r12, r3, r2
+; THUMBV7-NEXT: and r2, r12, #1
+; THUMBV7-NEXT: pop {r4, r5, r7, pc}
+; THUMBV7-NEXT: .LBB0_3: @ %overflow.no.lhs
+; THUMBV7-NEXT: mov r5, r0
+; THUMBV7-NEXT: umull r0, r4, r0, r2
+; THUMBV7-NEXT: cbz r3, .LBB0_7
+; THUMBV7-NEXT: @ %bb.4: @ %overflow.no.lhs.only
+; THUMBV7-NEXT: mul r12, r1, r3
+; THUMBV7-NEXT: mla r1, r1, r2, r4
+; THUMBV7-NEXT: umlal r1, r12, r5, r3
+; THUMBV7-NEXT: b .LBB0_6
+; THUMBV7-NEXT: .LBB0_5: @ %overflow.no.rhs.only
+; THUMBV7-NEXT: mov lr, r0
+; THUMBV7-NEXT: umull r0, r4, r2, r0
+; THUMBV7-NEXT: mov r5, r1
+; THUMBV7-NEXT: mul r12, r3, r1
+; THUMBV7-NEXT: mla r1, r3, lr, r4
+; THUMBV7-NEXT: umlal r1, r12, r2, r5
+; THUMBV7-NEXT: .LBB0_6: @ %overflow.res
+; THUMBV7-NEXT: cmp.w r12, #0
+; THUMBV7-NEXT: it ne
+; THUMBV7-NEXT: movne.w r12, #1
+; THUMBV7-NEXT: and r2, r12, #1
+; THUMBV7-NEXT: pop {r4, r5, r7, pc}
+; THUMBV7-NEXT: .LBB0_7: @ %overflow.no
+; THUMBV7-NEXT: mla r3, r5, r3, r4
+; THUMBV7-NEXT: mov.w r12, #0
+; THUMBV7-NEXT: mla r1, r1, r2, r3
+; THUMBV7-NEXT: and r2, r12, #1
; THUMBV7-NEXT: pop {r4, r5, r7, pc}
start:
%0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2
diff --git a/llvm/test/CodeGen/X86/muloti.ll b/llvm/test/CodeGen/X86/muloti.ll
index e101c702e6409..2d236cce94c30 100644
--- a/llvm/test/CodeGen/X86/muloti.ll
+++ b/llvm/test/CodeGen/X86/muloti.ll
@@ -6,60 +6,181 @@
; This used to call muloti4, but that won't link with libgcc.
define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
; CHECK-LABEL: x:
-; CHECK: ## %bb.0: ## %entry
+; CHECK: ## %bb.0: ## %overflow.entry
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: .cfi_offset %rbx, -24
; CHECK-NEXT: .cfi_offset %r14, -16
-; CHECK-NEXT: movq %rdx, %r9
-; CHECK-NEXT: movq %rsi, %r8
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: movq %rdi, %r8
+; CHECK-NEXT: sarq $63, %r8
+; CHECK-NEXT: cmpq %r8, %rsi
+; CHECK-NEXT: je LBB0_5
+; CHECK-NEXT: ## %bb.1: ## %overflow.lhs
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: je LBB0_2
+; CHECK-NEXT: ## %bb.7: ## %overflow1
; CHECK-NEXT: movq %rsi, %rbx
; CHECK-NEXT: sarq $63, %rbx
; CHECK-NEXT: imulq %rdx, %rbx
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: mulq %rdx
; CHECK-NEXT: movq %rdx, %r10
-; CHECK-NEXT: movq %rax, %rsi
-; CHECK-NEXT: movq %r8, %rax
-; CHECK-NEXT: mulq %r9
+; CHECK-NEXT: mulq %rdx
; CHECK-NEXT: movq %rdx, %r9
+; CHECK-NEXT: movq %rax, %r8
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: mulq %r10
+; CHECK-NEXT: movq %rdx, %r10
; CHECK-NEXT: movq %rax, %r11
-; CHECK-NEXT: addq %r10, %r11
-; CHECK-NEXT: adcq %rbx, %r9
-; CHECK-NEXT: movq %r9, %rbx
+; CHECK-NEXT: addq %r9, %r11
+; CHECK-NEXT: adcq %rbx, %r10
+; CHECK-NEXT: movq %r10, %rbx
; CHECK-NEXT: sarq $63, %rbx
-; CHECK-NEXT: movq %rcx, %r14
-; CHECK-NEXT: sarq $63, %r14
-; CHECK-NEXT: imulq %rdi, %r14
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: movq %rdi, %r14
+; CHECK-NEXT: imulq %rax, %r14
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: mulq %rcx
-; CHECK-NEXT: movq %rdx, %r10
+; CHECK-NEXT: movq %rdx, %r9
; CHECK-NEXT: movq %rax, %rdi
; CHECK-NEXT: addq %r11, %rdi
-; CHECK-NEXT: adcq %r14, %r10
-; CHECK-NEXT: movq %r10, %r11
+; CHECK-NEXT: adcq %r14, %r9
+; CHECK-NEXT: movq %r9, %r11
; CHECK-NEXT: sarq $63, %r11
-; CHECK-NEXT: addq %r9, %r10
+; CHECK-NEXT: addq %r10, %r9
; CHECK-NEXT: adcq %rbx, %r11
-; CHECK-NEXT: movq %r8, %rax
+; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: imulq %rcx
-; CHECK-NEXT: addq %r10, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: movq %r8, %rax
+; CHECK-NEXT: addq %r9, %rcx
; CHECK-NEXT: adcq %r11, %rdx
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: sarq $63, %rcx
-; CHECK-NEXT: xorq %rcx, %rdx
-; CHECK-NEXT: xorq %rax, %rcx
-; CHECK-NEXT: orq %rdx, %rcx
-; CHECK-NEXT: jne LBB0_1
-; CHECK-NEXT: ## %bb.2: ## %nooverflow
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: sarq $63, %rdi
+; CHECK-NEXT: xorq %rdi, %rdx
+; CHECK-NEXT: xorq %rcx, %rdi
+; CHECK-NEXT: orq %rdx, %rdi
+; CHECK-NEXT: jmp LBB0_8
+; CHECK-NEXT: LBB0_5: ## %overflow.no.lhs
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: je LBB0_6
+; CHECK-NEXT: ## %bb.4: ## %overflow.no.lhs.only
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: movq %rsi, %r9
+; CHECK-NEXT: xorq %rax, %r9
+; CHECK-NEXT: movq %rdi, %r8
+; CHECK-NEXT: xorq %rax, %r8
+; CHECK-NEXT: subq %rax, %r8
+; CHECK-NEXT: sbbq %rax, %r9
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: sets %r10b
+; CHECK-NEXT: cmovnsq %rsi, %r9
+; CHECK-NEXT: cmovnsq %rdi, %r8
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: xorq %rax, %rsi
+; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: xorq %rax, %rdi
+; CHECK-NEXT: subq %rax, %rdi
+; CHECK-NEXT: sbbq %rax, %rsi
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: sets %r11b
+; CHECK-NEXT: cmovnsq %rcx, %rsi
+; CHECK-NEXT: cmovnsq %rdx, %rdi
+; CHECK-NEXT: movq %r8, %rax
+; CHECK-NEXT: mulq %rdi
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: imulq %r9, %rdi
+; CHECK-NEXT: addq %rdx, %rdi
+; CHECK-NEXT: imulq %rsi, %r9
+; CHECK-NEXT: movq %r8, %rax
+; CHECK-NEXT: mulq %rsi
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: addq %rdi, %rsi
+; CHECK-NEXT: adcq %r9, %rdx
+; CHECK-NEXT: xorb %r10b, %r11b
+; CHECK-NEXT: movzbl %r11b, %ecx
+; CHECK-NEXT: jmp LBB0_3
+; CHECK-NEXT: LBB0_2: ## %overflow.no.rhs.only
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: movq %rcx, %r9
+; CHECK-NEXT: xorq %rax, %r9
+; CHECK-NEXT: movq %rdx, %r8
+; CHECK-NEXT: xorq %rax, %r8
+; CHECK-NEXT: subq %rax, %r8
+; CHECK-NEXT: sbbq %rax, %r9
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: sets %r10b
+; CHECK-NEXT: cmovnsq %rcx, %r9
+; CHECK-NEXT: cmovnsq %rdx, %r8
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: movq %rdi, %rdx
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: movq %rsi, %r14
+; CHECK-NEXT: xorq %rax, %r14
+; CHECK-NEXT: movq %rdi, %r11
+; CHECK-NEXT: xorq %rax, %r11
+; CHECK-NEXT: subq %rax, %r11
+; CHECK-NEXT: sbbq %rax, %r14
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: sets %bl
+; CHECK-NEXT: cmovnsq %rsi, %r14
+; CHECK-NEXT: cmovnsq %rdi, %r11
+; CHECK-NEXT: movq %r8, %rax
+; CHECK-NEXT: mulq %r11
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: imulq %r9, %r11
+; CHECK-NEXT: addq %rdx, %r11
+; CHECK-NEXT: imulq %r14, %r9
+; CHECK-NEXT: movq %r8, %rax
+; CHECK-NEXT: mulq %r14
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: addq %r11, %rsi
+; CHECK-NEXT: adcq %r9, %rdx
+; CHECK-NEXT: xorb %r10b, %bl
+; CHECK-NEXT: movzbl %bl, %ecx
+; CHECK-NEXT: LBB0_3: ## %overflow.res
+; CHECK-NEXT: movq %rcx, %rdi
+; CHECK-NEXT: negq %rdi
+; CHECK-NEXT: xorq %rdi, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: xorl %r8d, %r8d
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: setb %r8b
+; CHECK-NEXT: xorq %rdi, %rsi
+; CHECK-NEXT: addq %r8, %rsi
+; CHECK-NEXT: xorq %rdx, %rdi
+; CHECK-NEXT: cmpq %r8, %rsi
+; CHECK-NEXT: adcq $0, %rdi
+; CHECK-NEXT: LBB0_8: ## %overflow.res
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: testb $1, %cl
+; CHECK-NEXT: jne LBB0_10
+; CHECK-NEXT: LBB0_11: ## %nooverflow
+; CHECK-NEXT: movq %rsi, %rdx
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB0_1: ## %overflow
+; CHECK-NEXT: LBB0_6: ## %overflow.no
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movq %rdx, %r8
+; CHECK-NEXT: mulq %rdx
+; CHECK-NEXT: imulq %rcx, %rdi
+; CHECK-NEXT: addq %rdx, %rdi
+; CHECK-NEXT: imulq %r8, %rsi
+; CHECK-NEXT: addq %rdi, %rsi
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testb $1, %cl
+; CHECK-NEXT: je LBB0_11
+; CHECK-NEXT: LBB0_10: ## %overflow
; CHECK-NEXT: ud2
entry:
%tmp16 = zext i64 %a.coerce0 to i128
diff --git a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
index 13596e1b18768..1460a2564cc3e 100644
--- a/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll
@@ -4,64 +4,185 @@
define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X64-LABEL: smuloi128:
-; X64: ## %bb.0:
-; X64-NEXT: pushq %r15
+; X64: ## %bb.0: ## %overflow.entry
+; X64-NEXT: pushq %rbp
; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %r15
; X64-NEXT: .cfi_def_cfa_offset 24
-; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %r14
; X64-NEXT: .cfi_def_cfa_offset 32
-; X64-NEXT: .cfi_offset %rbx, -32
-; X64-NEXT: .cfi_offset %r14, -24
-; X64-NEXT: .cfi_offset %r15, -16
-; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: movq %rsi, %r9
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 40
+; X64-NEXT: .cfi_offset %rbx, -40
+; X64-NEXT: .cfi_offset %r14, -32
+; X64-NEXT: .cfi_offset %r15, -24
+; X64-NEXT: .cfi_offset %rbp, -16
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %rdi, %r9
+; X64-NEXT: sarq $63, %r9
+; X64-NEXT: cmpq %r9, %rsi
+; X64-NEXT: je LBB0_5
+; X64-NEXT: ## %bb.1: ## %overflow.lhs
+; X64-NEXT: cmpq %rax, %rcx
+; X64-NEXT: je LBB0_2
+; X64-NEXT: ## %bb.7: ## %overflow
; X64-NEXT: movq %rsi, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: imulq %rdx, %r14
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: mulq %rdx
; X64-NEXT: movq %rdx, %r11
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: movq %r9, %rax
-; X64-NEXT: mulq %r10
+; X64-NEXT: mulq %rdx
; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: mulq %r11
+; X64-NEXT: movq %rdx, %r11
; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: addq %r11, %rbx
-; X64-NEXT: adcq %r14, %r10
-; X64-NEXT: movq %r10, %r14
+; X64-NEXT: addq %r10, %rbx
+; X64-NEXT: adcq %r14, %r11
+; X64-NEXT: movq %r11, %r14
; X64-NEXT: sarq $63, %r14
-; X64-NEXT: movq %rcx, %r15
-; X64-NEXT: sarq $63, %r15
-; X64-NEXT: imulq %rdi, %r15
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %rdi, %r15
+; X64-NEXT: imulq %rax, %r15
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r11
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %rdi
; X64-NEXT: addq %rbx, %rdi
-; X64-NEXT: adcq %r15, %r11
-; X64-NEXT: movq %r11, %rbx
+; X64-NEXT: adcq %r15, %r10
+; X64-NEXT: movq %r10, %rbx
; X64-NEXT: sarq $63, %rbx
-; X64-NEXT: addq %r10, %r11
+; X64-NEXT: addq %r11, %r10
; X64-NEXT: adcq %r14, %rbx
-; X64-NEXT: movq %r9, %rax
+; X64-NEXT: movq %rsi, %rax
; X64-NEXT: imulq %rcx
-; X64-NEXT: addq %r11, %rax
+; X64-NEXT: addq %r10, %rax
; X64-NEXT: adcq %rbx, %rdx
-; X64-NEXT: movq %rdi, 8(%r8)
+; X64-NEXT: movq %rdi, %rsi
; X64-NEXT: sarq $63, %rdi
; X64-NEXT: xorq %rdi, %rdx
; X64-NEXT: xorq %rax, %rdi
; X64-NEXT: orq %rdx, %rdi
+; X64-NEXT: jmp LBB0_8
+; X64-NEXT: LBB0_5: ## %overflow.no.lhs
+; X64-NEXT: cmpq %rax, %rcx
+; X64-NEXT: je LBB0_6
+; X64-NEXT: ## %bb.4: ## %overflow.no.lhs.only
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %rsi, %r11
+; X64-NEXT: xorq %rax, %r11
+; X64-NEXT: movq %rdi, %r10
+; X64-NEXT: xorq %rax, %r10
+; X64-NEXT: subq %rax, %r10
+; X64-NEXT: sbbq %rax, %r11
+; X64-NEXT: testq %rsi, %rsi
+; X64-NEXT: sets %bl
+; X64-NEXT: cmovnsq %rsi, %r11
+; X64-NEXT: cmovnsq %rdi, %r10
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %rcx, %rsi
+; X64-NEXT: xorq %rax, %rsi
+; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: xorq %rax, %rdi
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: sbbq %rax, %rsi
+; X64-NEXT: testq %rcx, %rcx
+; X64-NEXT: sets %bpl
+; X64-NEXT: cmovnsq %rcx, %rsi
+; X64-NEXT: cmovnsq %rdx, %rdi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: imulq %r11, %rdi
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: imulq %rsi, %r11
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: addq %rdi, %rsi
+; X64-NEXT: jmp LBB0_3
+; X64-NEXT: LBB0_2: ## %overflow.no.rhs.only
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %rcx, %r11
+; X64-NEXT: xorq %rax, %r11
+; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: xorq %rax, %r10
+; X64-NEXT: subq %rax, %r10
+; X64-NEXT: sbbq %rax, %r11
+; X64-NEXT: testq %rcx, %rcx
+; X64-NEXT: sets %bl
+; X64-NEXT: cmovnsq %rcx, %r11
+; X64-NEXT: cmovnsq %rdx, %r10
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %rsi, %r14
+; X64-NEXT: xorq %rax, %r14
+; X64-NEXT: movq %rdi, %rcx
+; X64-NEXT: xorq %rax, %rcx
+; X64-NEXT: subq %rax, %rcx
+; X64-NEXT: sbbq %rax, %r14
+; X64-NEXT: testq %rsi, %rsi
+; X64-NEXT: sets %bpl
+; X64-NEXT: cmovnsq %rsi, %r14
+; X64-NEXT: cmovnsq %rdi, %rcx
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: imulq %r11, %rcx
+; X64-NEXT: addq %rdx, %rcx
+; X64-NEXT: imulq %r14, %r11
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r14
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: addq %rcx, %rsi
+; X64-NEXT: LBB0_3: ## %overflow.res
+; X64-NEXT: adcq %r11, %rdx
+; X64-NEXT: xorb %bl, %bpl
+; X64-NEXT: movzbl %bpl, %eax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: negq %rcx
+; X64-NEXT: xorq %rcx, %r9
+; X64-NEXT: addq %rax, %r9
+; X64-NEXT: xorl %edi, %edi
+; X64-NEXT: cmpq %rax, %r9
+; X64-NEXT: setb %dil
+; X64-NEXT: xorq %rcx, %rsi
+; X64-NEXT: addq %rdi, %rsi
+; X64-NEXT: xorq %rdx, %rcx
+; X64-NEXT: cmpq %rdi, %rsi
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: LBB0_8: ## %overflow.res
; X64-NEXT: setne %al
-; X64-NEXT: movq %rsi, (%r8)
+; X64-NEXT: jmp LBB0_9
+; X64-NEXT: LBB0_6: ## %overflow.no
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: movq %rdx, %r10
+; X64-NEXT: mulq %rdx
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: imulq %rcx, %rdi
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: imulq %r10, %rsi
+; X64-NEXT: addq %rdi, %rsi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: LBB0_9: ## %overflow.res
+; X64-NEXT: movq %r9, (%r8)
+; X64-NEXT: movq %rsi, 8(%r8)
+; X64-NEXT: andb $1, %al
+; X64-NEXT: ## kill: def $al killed $al killed $eax
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r14
; X64-NEXT: popq %r15
+; X64-NEXT: popq %rbp
; X64-NEXT: retq
;
; X86-LABEL: smuloi128:
-; X86: ## %bb.0:
+; X86: ## %bb.0: ## %overflow.entry
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %ebx
@@ -70,196 +191,212 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $44, %esp
-; X86-NEXT: .cfi_def_cfa_offset 64
+; X86-NEXT: subl $52, %esp
+; X86-NEXT: .cfi_def_cfa_offset 72
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: xorl %ecx, %edx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: je LBB0_12
+; X86-NEXT: ## %bb.1: ## %overflow.lhs
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: je LBB0_2
+; X86-NEXT: ## %bb.14: ## %overflow
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: mull %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ebx, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %ebp
-; X86-NEXT: setb %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ebp, %esi
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: adcl %eax, %edi
-; X86-NEXT: sarl $31, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: imull %ebx, %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
+; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: addl %ebp, %ecx
-; X86-NEXT: addl %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ebx
+; X86-NEXT: sarl $31, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: imull %edi, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %eax, %esi
+; X86-NEXT: movl %esi, (%esp) ## 4-byte Spill
+; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edi, %ebp
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: addl %ecx, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: adcl %ebx, %ecx
; X86-NEXT: setb %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %esi, %ebp
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movzbl %bl, %edi
-; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movzbl %bl, %ecx
+; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %ebx, %ebp
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: setb %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %ebx, %ecx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edi
-; X86-NEXT: movl %ebp, %esi
-; X86-NEXT: sarl $31, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: imull %esi, %ebx
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: movl %ebp, %ebx
+; X86-NEXT: sarl $31, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: imull %ebx, %ecx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: addl %ecx, %ebp
; X86-NEXT: addl %eax, %ebp
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: addl %eax, %edi
+; X86-NEXT: adcl %esi, %ebp
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: movl %ebp, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: addl (%esp), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: adcl %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: imull %ecx, %ebx
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: imull %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: addl %ebx, %ecx
-; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: addl %eax, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: movl %esi, %eax
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: mull %edx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: addl %ebx, %esi
-; X86-NEXT: addl %eax, %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: addl %eax, %ebx
; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %esi
+; X86-NEXT: adcl %esi, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
+; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ebx, %ecx
-; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movzbl %bl, %ecx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; X86-NEXT: adcl %ebp, %ebx
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: adcl (%esp), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %esi, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: xorl %ecx, %eax
@@ -268,38 +405,435 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
; X86-NEXT: xorl %ecx, %edx
; X86-NEXT: xorl %ebx, %ecx
; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
; X86-NEXT: orl %edi, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: jmp LBB0_15
+; X86-NEXT: LBB0_12: ## %overflow.no.lhs
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: je LBB0_13
+; X86-NEXT: ## %bb.7: ## %overflow.no.lhs.only
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: xorl %eax, %ebp
+; X86-NEXT: subl %eax, %ebp
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: js LBB0_9
+; X86-NEXT: ## %bb.8: ## %overflow.no.lhs.only
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: LBB0_9: ## %overflow.no.lhs.only
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: xorl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: subl %eax, %ebx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: js LBB0_11
+; X86-NEXT: ## %bb.10: ## %overflow.no.lhs.only
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: LBB0_11: ## %overflow.no.lhs.only
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %esi, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: imull %edx, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ebx, %esi
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: mull %edi
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: imull %edi, %ebp
+; X86-NEXT: addl %ecx, %ebp
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: adcl (%esp), %ebp ## 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %cl ## 1-byte Folded Reload
+; X86-NEXT: movzbl %cl, %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: xorl %ecx, %edi
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: cmpl %edx, %ebx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: setb %dl
+; X86-NEXT: xorl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %edi
+; X86-NEXT: movzbl %dl, %edx
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: xorl %ecx, %ebp
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: cmpl %edx, %edi
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: orl %ecx, %ebp
+; X86-NEXT: jmp LBB0_15
+; X86-NEXT: LBB0_2: ## %overflow.no.rhs.only
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: xorl %eax, %ebp
+; X86-NEXT: subl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: js LBB0_4
+; X86-NEXT: ## %bb.3: ## %overflow.no.rhs.only
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: LBB0_4: ## %overflow.no.rhs.only
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: xorl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: subl %eax, %ebx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: js LBB0_6
+; X86-NEXT: ## %bb.5: ## %overflow.no.rhs.only
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: LBB0_6: ## %overflow.no.rhs.only
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: imull %edx, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ebp, %esi
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ecx, %ebp
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: mull %ecx
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: imull %ecx, %edi
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: adcl (%esp), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %cl ## 1-byte Folded Reload
+; X86-NEXT: movzbl %cl, %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: xorl %ecx, %ebp
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: cmpl %edx, %ebx
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: setb %dl
+; X86-NEXT: xorl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %ebp
+; X86-NEXT: movzbl %dl, %edx
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: xorl %ecx, %edi
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: cmpl %edx, %ebp
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl $0, %eax
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: LBB0_15: ## %overflow.res
; X86-NEXT: setne %al
-; X86-NEXT: addl $44, %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
-; X86-NEXT: popl %ebp
-; X86-NEXT: retl
- %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
- %val = extractvalue {i128, i1} %t, 0
- %obit = extractvalue {i128, i1} %t, 1
- store i128 %val, ptr %res
- ret i1 %obit
-}
-
-define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
-; X64-LABEL: smuloi256:
-; X64: ## %bb.0:
-; X64-NEXT: pushq %rbp
-; X64-NEXT: .cfi_def_cfa_offset 16
-; X64-NEXT: pushq %r15
-; X64-NEXT: .cfi_def_cfa_offset 24
-; X64-NEXT: pushq %r14
-; X64-NEXT: .cfi_def_cfa_offset 32
+; X86-NEXT: jmp LBB0_16
+; X86-NEXT: LBB0_13: ## %overflow.no
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: imull %ecx, %ebx
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
+; X86-NEXT: imull %ecx, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: imull %esi, %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ebx, %ebp
+; X86-NEXT: movl %ebp, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: adcl (%esp), %esi ## 4-byte Folded Reload
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: LBB0_16: ## %overflow.res
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, (%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, 4(%ecx)
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl %esi, 12(%ecx)
+; X86-NEXT: andb $1, %al
+; X86-NEXT: ## kill: def $al killed $al killed $eax
+; X86-NEXT: addl $52, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2)
+ %val = extractvalue {i128, i1} %t, 0
+ %obit = extractvalue {i128, i1} %t, 1
+ store i128 %val, ptr %res
+ ret i1 %obit
+}
+
+define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
+; X64-LABEL: smuloi256:
+; X64: ## %bb.0: ## %overflow.entry
+; X64-NEXT: pushq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: pushq %r15
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: pushq %r14
+; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: pushq %r13
; X64-NEXT: .cfi_def_cfa_offset 40
; X64-NEXT: pushq %r12
@@ -312,199 +846,558 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X64-NEXT: .cfi_offset %r14, -32
; X64-NEXT: .cfi_offset %r15, -24
; X64-NEXT: .cfi_offset %rbp, -16
-; X64-NEXT: movq %r8, %r12
-; X64-NEXT: movq %rcx, %rbx
+; X64-NEXT: movq %r8, %r15
+; X64-NEXT: movq %rcx, %r12
; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rsi, %r10
-; X64-NEXT: movq %rdi, %r11
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rdx, %rsi
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rsi, %r11
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; X64-NEXT: movq %r9, %rsi
+; X64-NEXT: sarq $63, %rsi
+; X64-NEXT: movq %r11, %rcx
+; X64-NEXT: sarq $63, %rcx
+; X64-NEXT: movq %r12, %rdx
+; X64-NEXT: xorq %rcx, %rdx
+; X64-NEXT: xorq %r8, %rcx
+; X64-NEXT: orq %rdx, %rcx
+; X64-NEXT: je LBB1_4
+; X64-NEXT: ## %bb.1: ## %overflow.lhs
+; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: xorq %rsi, %rcx
+; X64-NEXT: xorq %rbx, %rsi
+; X64-NEXT: orq %rcx, %rsi
+; X64-NEXT: je LBB1_2
+; X64-NEXT: ## %bb.6: ## %overflow
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rax, %r14
-; X64-NEXT: addq %rsi, %r14
-; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: movq %r12, %rax
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: adcq $0, %rsi
; X64-NEXT: movq %r8, %rax
; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %r14, %r13
-; X64-NEXT: adcq %rcx, %rsi
+; X64-NEXT: addq %rdi, %r13
+; X64-NEXT: adcq %rsi, %rcx
; X64-NEXT: setb %al
-; X64-NEXT: movzbl %al, %ecx
-; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: movq %rbx, %rax
-; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rax, %r14
-; X64-NEXT: addq %rsi, %r14
-; X64-NEXT: adcq %rcx, %r8
-; X64-NEXT: movq %rbx, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: movq %r9, %rsi
-; X64-NEXT: imulq %rcx, %rsi
+; X64-NEXT: movzbl %al, %edi
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rcx
-; X64-NEXT: movq %rdx, %r15
-; X64-NEXT: addq %rax, %r15
-; X64-NEXT: addq %rsi, %r15
-; X64-NEXT: addq %rax, %r14
-; X64-NEXT: adcq %r8, %r15
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: mulq %r12
+; X64-NEXT: mulq %r9
; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: addq %rcx, %r10
+; X64-NEXT: adcq %rdi, %rsi
+; X64-NEXT: movq %r12, %rdx
+; X64-NEXT: sarq $63, %rdx
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: movq %r9, %rcx
+; X64-NEXT: imulq %rdx, %rcx
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %rdx
+; X64-NEXT: movq %rdx, %rbp
+; X64-NEXT: addq %rax, %rbp
+; X64-NEXT: addq %rcx, %rbp
+; X64-NEXT: addq %rax, %r10
+; X64-NEXT: adcq %rsi, %rbp
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: movq %r10, %rax
-; X64-NEXT: mulq %r12
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %r12
-; X64-NEXT: addq %rsi, %r12
-; X64-NEXT: adcq $0, %rdi
; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rcx, %r8
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: movq %r14, %rax
; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rdx, %rbx
-; X64-NEXT: addq %r12, %rax
+; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: addq %r8, %rax
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: adcq %rdi, %rbx
-; X64-NEXT: setb %dil
-; X64-NEXT: movq %r10, %rax
+; X64-NEXT: adcq %rsi, %rdi
+; X64-NEXT: setb %sil
+; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %r9
-; X64-NEXT: movq %rdx, %rbp
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: addq %rbx, %rsi
+; X64-NEXT: movq %rdx, %rbx
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: addq %rdi, %rcx
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: adcq %rax, %rbx
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Folded Reload
+; X64-NEXT: adcq %r13, %rbx
+; X64-NEXT: adcq $0, %r10
+; X64-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: adcq $0, %rbp
+; X64-NEXT: movq %r14, %rax
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: adcq %rax, %rbp
-; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Folded Reload
-; X64-NEXT: adcq %r13, %rbp
-; X64-NEXT: adcq $0, %r14
-; X64-NEXT: adcq $0, %r15
-; X64-NEXT: movq %r15, %r12
-; X64-NEXT: sarq $63, %r12
-; X64-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: movq %r10, %rax
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: movq %r11, %rax
; X64-NEXT: mulq %r8
-; X64-NEXT: movq %rdx, %r13
+; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %rdi, %r9
-; X64-NEXT: adcq $0, %r13
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: addq %rsi, %r9
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: addq %r9, %r10
+; X64-NEXT: adcq %r8, %rsi
+; X64-NEXT: setb %r9b
; X64-NEXT: movq %r11, %rax
-; X64-NEXT: mulq %rdi
-; X64-NEXT: movq %rdi, %r11
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: addq %r9, %rax
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %r13
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %rsi, %r8
+; X64-NEXT: movzbl %r9b, %eax
+; X64-NEXT: adcq %rax, %r13
+; X64-NEXT: movq %r15, %rsi
+; X64-NEXT: sarq $63, %rsi
+; X64-NEXT: imulq %rsi, %r11
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: mulq %r14
+; X64-NEXT: movq %rdx, %r9
+; X64-NEXT: addq %r11, %r9
+; X64-NEXT: addq %rax, %r9
+; X64-NEXT: addq %rax, %r8
+; X64-NEXT: adcq %r13, %r9
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: adcq %rbx, %r10
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: adcq $0, %r9
+; X64-NEXT: movq %r9, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %rbp, %rcx
+; X64-NEXT: sarq $63, %rcx
+; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Folded Reload
+; X64-NEXT: adcq %rbp, %r9
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: adcq %rax, %rdx
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: adcq %rax, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: movq %r15, %r11
+; X64-NEXT: movq %r15, %rbp
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
+; X64-NEXT: imulq %rcx, %r11
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %r14
+; X64-NEXT: addq %rax, %rcx
+; X64-NEXT: addq %r11, %rcx
+; X64-NEXT: movq %r12, %r15
+; X64-NEXT: imulq %rsi, %r12
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
+; X64-NEXT: mulq %rsi
; X64-NEXT: movq %rax, %rbx
-; X64-NEXT: adcq %r13, %rdi
-; X64-NEXT: setb %r8b
+; X64-NEXT: movq %rdx, %r11
+; X64-NEXT: addq %r12, %r11
+; X64-NEXT: addq %rax, %r11
+; X64-NEXT: addq %r14, %rbx
+; X64-NEXT: adcq %rcx, %r11
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: movq %rsi, %r12
+; X64-NEXT: mulq %r13
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %r13
+; X64-NEXT: movq %rdx, %r14
+; X64-NEXT: movq %rax, %r13
+; X64-NEXT: addq %rsi, %r13
+; X64-NEXT: adcq $0, %r14
+; X64-NEXT: movq %r12, %rax
+; X64-NEXT: mulq %rbp
+; X64-NEXT: movq %rdx, %r12
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: addq %r13, %rsi
+; X64-NEXT: adcq %r14, %r12
+; X64-NEXT: setb %r14b
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 ## 8-byte Reload
+; X64-NEXT: mulq %rbp
+; X64-NEXT: addq %r12, %rax
+; X64-NEXT: movzbl %r14b, %r14d
+; X64-NEXT: adcq %r14, %rdx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
+; X64-NEXT: addq %rbx, %rax
+; X64-NEXT: adcq %r11, %rdx
+; X64-NEXT: addq %r8, %rcx
+; X64-NEXT: adcq %r9, %rsi
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Folded Reload
+; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Folded Reload
+; X64-NEXT: movq %r10, %r8
+; X64-NEXT: sarq $63, %r8
+; X64-NEXT: xorq %r8, %rax
+; X64-NEXT: xorq %r8, %rcx
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: xorq %r8, %rdx
+; X64-NEXT: xorq %rsi, %r8
+; X64-NEXT: orq %rdx, %r8
+; X64-NEXT: orq %rcx, %r8
+; X64-NEXT: jmp LBB1_7
+; X64-NEXT: LBB1_4: ## %overflow.no.lhs
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: xorq %rsi, %rcx
+; X64-NEXT: xorq %rbx, %rsi
+; X64-NEXT: orq %rcx, %rsi
+; X64-NEXT: je LBB1_5
+; X64-NEXT: ## %bb.3: ## %overflow.no.lhs.only
+; X64-NEXT: movq %r12, %rsi
+; X64-NEXT: sarq $63, %rsi
+; X64-NEXT: movq %r12, %rcx
+; X64-NEXT: xorq %rsi, %rcx
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: movq %r8, %rbp
+; X64-NEXT: xorq %rsi, %rbp
+; X64-NEXT: movq %r9, %rcx
+; X64-NEXT: movq %r11, %r13
+; X64-NEXT: xorq %rsi, %r13
+; X64-NEXT: movq %rdi, %r10
+; X64-NEXT: xorq %rsi, %r10
+; X64-NEXT: subq %rsi, %r10
+; X64-NEXT: sbbq %rsi, %r13
+; X64-NEXT: sbbq %rsi, %rbp
+; X64-NEXT: sbbq %rsi, %rdx
+; X64-NEXT: testq %r12, %r12
+; X64-NEXT: sets {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Folded Spill
+; X64-NEXT: cmovnsq %r12, %rdx
+; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: cmovnsq %r8, %rbp
+; X64-NEXT: cmovnsq %r11, %r13
+; X64-NEXT: cmovnsq %rdi, %r10
+; X64-NEXT: movq %rbx, %rdx
+; X64-NEXT: sarq $63, %rdx
+; X64-NEXT: movq %rbx, %r12
+; X64-NEXT: xorq %rdx, %r12
+; X64-NEXT: movq %rax, %r14
+; X64-NEXT: xorq %rdx, %r14
+; X64-NEXT: xorq %rdx, %r9
+; X64-NEXT: movq %r15, %r11
+; X64-NEXT: xorq %rdx, %r11
+; X64-NEXT: subq %rdx, %r11
+; X64-NEXT: sbbq %rdx, %r9
+; X64-NEXT: sbbq %rdx, %r14
+; X64-NEXT: sbbq %rdx, %r12
+; X64-NEXT: testq %rbx, %rbx
+; X64-NEXT: sets {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Folded Spill
+; X64-NEXT: cmovnsq %rbx, %r12
+; X64-NEXT: cmovnsq %rax, %r14
+; X64-NEXT: cmovnsq %rcx, %r9
+; X64-NEXT: cmovnsq %r15, %r11
; X64-NEXT: movq %r10, %rax
; X64-NEXT: mulq %r11
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %r11
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: adcq %rsi, %rcx
+; X64-NEXT: setb %al
+; X64-NEXT: movzbl %al, %r8d
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: adcq %r8, %rsi
+; X64-NEXT: imulq %rbp, %r9
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %rbp
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 ## 8-byte Reload
+; X64-NEXT: imulq %r15, %r11
+; X64-NEXT: addq %rdx, %r11
+; X64-NEXT: addq %r9, %r11
+; X64-NEXT: addq %rdi, %rcx
+; X64-NEXT: adcq %rsi, %r11
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %r14
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r14
+; X64-NEXT: movq %rax, %rdi
; X64-NEXT: movq %rdx, %r9
-; X64-NEXT: movq %rax, %r13
-; X64-NEXT: addq %rdi, %r13
-; X64-NEXT: movzbl %r8b, %eax
-; X64-NEXT: adcq %rax, %r9
-; X64-NEXT: movq %r11, %rdi
-; X64-NEXT: movq %r11, %r8
-; X64-NEXT: sarq $63, %rdi
-; X64-NEXT: imulq %rdi, %r10
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: mulq {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Reload
-; X64-NEXT: movq %rdx, %r11
-; X64-NEXT: addq %r10, %r11
-; X64-NEXT: addq %rax, %r11
-; X64-NEXT: addq %rax, %r13
-; X64-NEXT: adcq %r9, %r11
-; X64-NEXT: addq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill
-; X64-NEXT: adcq %rbp, %rbx
-; X64-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: adcq $0, %r13
-; X64-NEXT: adcq $0, %r11
-; X64-NEXT: movq %r11, %rbp
-; X64-NEXT: sarq $63, %rbp
-; X64-NEXT: addq %r14, %r13
-; X64-NEXT: adcq %r15, %r11
+; X64-NEXT: addq %r8, %r9
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: addq %r9, %r10
+; X64-NEXT: adcq %rsi, %r8
+; X64-NEXT: setb %al
+; X64-NEXT: movzbl %al, %ebx
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: addq %r8, %r9
+; X64-NEXT: adcq %rbx, %rsi
+; X64-NEXT: movq %r14, %rax
+; X64-NEXT: mulq %rbp
+; X64-NEXT: imulq %r15, %r14
+; X64-NEXT: addq %rdx, %r14
+; X64-NEXT: imulq %rbp, %r12
+; X64-NEXT: addq %r14, %r12
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
+; X64-NEXT: addq %r9, %rax
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; X64-NEXT: adcq %rsi, %r12
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: adcq %r11, %r10
+; X64-NEXT: adcq $0, %rax
+; X64-NEXT: adcq $0, %r12
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X64-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %cl ## 1-byte Folded Reload
+; X64-NEXT: movzbl %cl, %edx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 ## 8-byte Reload
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: negq %rcx
+; X64-NEXT: xorq %rcx, %r15
+; X64-NEXT: xorq %rcx, %r14
+; X64-NEXT: addq %rdx, %r14
+; X64-NEXT: adcq $0, %r15
+; X64-NEXT: cmpq %rdx, %r14
+; X64-NEXT: movq %r15, %rdx
+; X64-NEXT: sbbq $0, %rdx
+; X64-NEXT: setb %dl
+; X64-NEXT: movzbl %dl, %edx
+; X64-NEXT: xorq %rcx, %r10
+; X64-NEXT: xorq %rcx, %rdi
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: adcq $0, %r10
+; X64-NEXT: xorq %rcx, %r12
+; X64-NEXT: xorq %rax, %rcx
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: sbbq $0, %rax
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: adcq $0, %r12
+; X64-NEXT: orq %rcx, %r12
+; X64-NEXT: setne %al
+; X64-NEXT: jmp LBB1_8
+; X64-NEXT: LBB1_2: ## %overflow.no.rhs.only
+; X64-NEXT: movq %rbx, %rdx
+; X64-NEXT: sarq $63, %rdx
+; X64-NEXT: movq %rbx, %rcx
+; X64-NEXT: xorq %rdx, %rcx
+; X64-NEXT: movq %rax, %r14
+; X64-NEXT: xorq %rdx, %r14
+; X64-NEXT: movq %r9, %r13
+; X64-NEXT: xorq %rdx, %r13
+; X64-NEXT: movq %r15, %r10
+; X64-NEXT: xorq %rdx, %r10
+; X64-NEXT: subq %rdx, %r10
+; X64-NEXT: sbbq %rdx, %r13
+; X64-NEXT: sbbq %rdx, %r14
+; X64-NEXT: sbbq %rdx, %rcx
+; X64-NEXT: testq %rbx, %rbx
+; X64-NEXT: sets {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Folded Spill
+; X64-NEXT: cmovnsq %rbx, %rcx
+; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; X64-NEXT: cmovnsq %rax, %r14
+; X64-NEXT: cmovnsq %r9, %r13
+; X64-NEXT: cmovnsq %r15, %r10
; X64-NEXT: movq %r12, %rax
-; X64-NEXT: adcq %rbp, %rax
+; X64-NEXT: sarq $63, %rax
+; X64-NEXT: movq %r12, %rbp
+; X64-NEXT: xorq %rax, %rbp
+; X64-NEXT: movq %r12, %rsi
+; X64-NEXT: movq %r8, %r12
+; X64-NEXT: xorq %rax, %r12
+; X64-NEXT: movq %r11, %rbx
+; X64-NEXT: xorq %rax, %rbx
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
+; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: xorq %rax, %rdi
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: sbbq %rax, %rbx
+; X64-NEXT: sbbq %rax, %r12
+; X64-NEXT: sbbq %rax, %rbp
+; X64-NEXT: testq %rsi, %rsi
+; X64-NEXT: sets {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Folded Spill
+; X64-NEXT: cmovnsq %rsi, %rbp
+; X64-NEXT: cmovnsq %r8, %r12
+; X64-NEXT: cmovnsq %r11, %rbx
+; X64-NEXT: cmovnsq %rdx, %rdi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; X64-NEXT: adcq %r12, %rbp
-; X64-NEXT: movq %r8, %rbx
-; X64-NEXT: imulq %rcx, %r8
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15
-; X64-NEXT: movq %r15, %rax
-; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r9
-; X64-NEXT: addq %rax, %rsi
-; X64-NEXT: addq %r8, %rsi
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Reload
-; X64-NEXT: imulq %r12, %rcx
+; X64-NEXT: addq %rcx, %r9
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %rbx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %r8
+; X64-NEXT: addq %r9, %r8
+; X64-NEXT: adcq %rsi, %rcx
+; X64-NEXT: setb %al
+; X64-NEXT: movzbl %al, %r9d
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %rbx
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: addq %rcx, %r11
+; X64-NEXT: adcq %r9, %rsi
+; X64-NEXT: imulq %r14, %rbx
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload
+; X64-NEXT: mulq %r14
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: movq %rdx, %r9
+; X64-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Folded Reload
+; X64-NEXT: addq %rbx, %r9
+; X64-NEXT: addq %rdi, %r9
+; X64-NEXT: addq %r11, %rcx
+; X64-NEXT: adcq %rsi, %r9
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %r12
+; X64-NEXT: movq %rax, %rdi
+; X64-NEXT: movq %rdx, %rbx
+; X64-NEXT: addq %r11, %rbx
+; X64-NEXT: adcq $0, %rsi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: mulq %rbp
+; X64-NEXT: movq %rdx, %r11
+; X64-NEXT: movq %rax, %r10
+; X64-NEXT: addq %rbx, %r10
+; X64-NEXT: adcq %rsi, %r11
+; X64-NEXT: setb %al
+; X64-NEXT: movzbl %al, %r15d
+; X64-NEXT: movq %r13, %rax
+; X64-NEXT: mulq %rbp
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: addq %r11, %rbx
+; X64-NEXT: adcq %r15, %rsi
+; X64-NEXT: movq %r8, %r15
+; X64-NEXT: movq %r12, %rax
+; X64-NEXT: mulq %r14
+; X64-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %r12 ## 8-byte Folded Reload
+; X64-NEXT: imulq %r14, %rbp
+; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
+; X64-NEXT: addq %rdx, %rbp
+; X64-NEXT: addq %r12, %rbp
+; X64-NEXT: addq %rbx, %rax
+; X64-NEXT: adcq %rsi, %rbp
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: adcq %r9, %r10
+; X64-NEXT: adcq $0, %rax
+; X64-NEXT: adcq $0, %rbp
+; X64-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X64-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %cl ## 1-byte Folded Reload
+; X64-NEXT: movzbl %cl, %edx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: negq %rcx
+; X64-NEXT: xorq %rcx, %r15
+; X64-NEXT: xorq %rcx, %r14
+; X64-NEXT: addq %rdx, %r14
+; X64-NEXT: adcq $0, %r15
+; X64-NEXT: cmpq %rdx, %r14
+; X64-NEXT: movq %r15, %rdx
+; X64-NEXT: sbbq $0, %rdx
+; X64-NEXT: setb %dl
+; X64-NEXT: movzbl %dl, %edx
+; X64-NEXT: xorq %rcx, %r10
+; X64-NEXT: xorq %rcx, %rdi
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: adcq $0, %r10
+; X64-NEXT: xorq %rcx, %rbp
+; X64-NEXT: xorq %rax, %rcx
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %r10, %rax
+; X64-NEXT: sbbq $0, %rax
+; X64-NEXT: adcq $0, %rcx
+; X64-NEXT: adcq $0, %rbp
+; X64-NEXT: orq %rcx, %rbp
+; X64-NEXT: LBB1_7: ## %overflow.res
+; X64-NEXT: setne %al
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13
+; X64-NEXT: jmp LBB1_8
+; X64-NEXT: LBB1_5: ## %overflow.no
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: imulq %r11, %rcx
; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: addq %rcx, %rdx
+; X64-NEXT: imulq %rdi, %rbx
+; X64-NEXT: addq %rdx, %rbx
+; X64-NEXT: movq %r15, %rax
+; X64-NEXT: mulq %r8
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: imulq %r15, %r12
+; X64-NEXT: addq %rdx, %r12
+; X64-NEXT: imulq %r9, %r8
+; X64-NEXT: addq %r12, %r8
+; X64-NEXT: addq %rsi, %rcx
+; X64-NEXT: adcq %rbx, %r8
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %r15
+; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r14
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %r15
; X64-NEXT: movq %rdx, %r10
-; X64-NEXT: addq %rcx, %r10
-; X64-NEXT: addq %rax, %r10
-; X64-NEXT: addq %r9, %r14
-; X64-NEXT: adcq %rsi, %r10
+; X64-NEXT: movq %rax, %rbx
+; X64-NEXT: addq %rsi, %rbx
+; X64-NEXT: adcq $0, %r10
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %rdi
-; X64-NEXT: movq %rax, %rsi
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %r15
-; X64-NEXT: movq %rdx, %r9
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r15
-; X64-NEXT: addq %rdi, %r15
-; X64-NEXT: adcq $0, %r9
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: mulq %rbx
-; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: addq %rbx, %r15
+; X64-NEXT: adcq %r10, %rsi
+; X64-NEXT: setb %al
+; X64-NEXT: movzbl %al, %ebx
+; X64-NEXT: movq %r11, %rax
+; X64-NEXT: mulq %r9
+; X64-NEXT: movq %rdx, %r10
; X64-NEXT: movq %rax, %rdi
-; X64-NEXT: addq %r15, %rdi
-; X64-NEXT: adcq %r9, %r8
-; X64-NEXT: setb %cl
-; X64-NEXT: movq %r12, %rax
-; X64-NEXT: mulq %rbx
-; X64-NEXT: addq %r8, %rax
-; X64-NEXT: movzbl %cl, %ecx
-; X64-NEXT: adcq %rcx, %rdx
-; X64-NEXT: addq %r14, %rax
-; X64-NEXT: adcq %r10, %rdx
-; X64-NEXT: addq %r13, %rsi
-; X64-NEXT: adcq %r11, %rdi
-; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Folded Reload
-; X64-NEXT: adcq %rbp, %rdx
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 ## 8-byte Reload
-; X64-NEXT: movq %r8, %rcx
-; X64-NEXT: sarq $63, %rcx
-; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: xorq %rcx, %rsi
-; X64-NEXT: orq %rax, %rsi
-; X64-NEXT: xorq %rcx, %rdx
-; X64-NEXT: xorq %rdi, %rcx
-; X64-NEXT: orq %rdx, %rcx
-; X64-NEXT: orq %rsi, %rcx
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; X64-NEXT: movq %r8, 24(%rax)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
-; X64-NEXT: movq %rcx, (%rax)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
-; X64-NEXT: movq %rcx, 8(%rax)
-; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
-; X64-NEXT: movq %rcx, 16(%rax)
-; X64-NEXT: setne %al
+; X64-NEXT: addq %rsi, %rdi
+; X64-NEXT: adcq %rbx, %r10
+; X64-NEXT: addq %rcx, %rdi
+; X64-NEXT: adcq %r8, %r10
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: LBB1_8: ## %overflow.res
+; X64-NEXT: movq %r14, (%r13)
+; X64-NEXT: movq %r15, 8(%r13)
+; X64-NEXT: movq %rdi, 16(%r13)
+; X64-NEXT: movq %r10, 24(%r13)
+; X64-NEXT: andb $1, %al
+; X64-NEXT: ## kill: def $al killed $al killed $eax
; X64-NEXT: popq %rbx
; X64-NEXT: popq %r12
; X64-NEXT: popq %r13
@@ -514,7 +1407,7 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X64-NEXT: retq
;
; X86-LABEL: smuloi256:
-; X86: ## %bb.0:
+; X86: ## %bb.0: ## %overflow.entry
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %ebx
@@ -529,334 +1422,1687 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl %ecx, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: je LBB1_12
+; X86-NEXT: ## %bb.1: ## %overflow.lhs
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: je LBB1_2
+; X86-NEXT: ## %bb.14: ## %overflow
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %edi, %ecx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: setb %bl
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ebx, %esi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %ebx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %ebx, %ecx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ebx, %esi
+; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl (%esp), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, (%esp) ## 4-byte Spill
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ebx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edi, %ebp
+; X86-NEXT: sarl $31, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 1-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: adcl %edx, %ebp
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl (%esp), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ebp
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: setb %bl
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edi, %esi
+; X86-NEXT: adcl %ecx, %ebx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %ebx, %ecx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: setb (%esp) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ebx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ebx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %esi, %edi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %esi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edi, %esi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ebp, %edi
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ebp, %esi
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ebx, %edi
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ebx
+; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %ebx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %ebp
+; X86-NEXT: sarl $31, %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %eax, %edi
+; X86-NEXT: adcl %edx, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: imull %ebp, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: addl %ebx, %ecx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 1-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: adcl %edx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl %edx, (%esp) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %edi, %ecx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %ebx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %ecx, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %edi, %esi
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: imull %ebx, %edi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl %eax, %ebp
+; X86-NEXT: adcl %edx, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %eax, %esi
+; X86-NEXT: adcl %edx, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: imull %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: addl %edi, %ecx
+; X86-NEXT: adcl %edx, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl %ebp, %ecx
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: xorl %edx, %ebx
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: xorl %edx, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: movl %ebp, %ecx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: orl %esi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: jmp LBB1_15
+; X86-NEXT: LBB1_12: ## %overflow.no.lhs
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: je LBB1_13
+; X86-NEXT: ## %bb.7: ## %overflow.no.lhs.only
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: xorl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: subl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: js LBB1_9
+; X86-NEXT: ## %bb.8: ## %overflow.no.lhs.only
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: LBB1_9: ## %overflow.no.lhs.only
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: xorl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: js LBB1_11
+; X86-NEXT: ## %bb.10: ## %overflow.no.lhs.only
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: LBB1_11: ## %overflow.no.lhs.only
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %ebx
+; X86-NEXT: setb %cl
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ecx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ebp
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebp, %edi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl (%esp), %esi ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ebp
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: movl (%esp), %ebp ## 4-byte Reload
+; X86-NEXT: imull %eax, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: addl %ebp, %edx
+; X86-NEXT: imull %esi, %ecx
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: addl (%esp), %eax ## 4-byte Folded Reload
+; X86-NEXT: adcl %ebp, %ecx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %esi, %edi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %esi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %edi, %ecx
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ebx
+; X86-NEXT: setb %cl
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: imull %esi, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: imull %ebx, %ebp
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: imull %ecx, %edx
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ecx, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %cl ## 1-byte Folded Reload
+; X86-NEXT: movzbl %cl, %esi
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %edx
+; X86-NEXT: xorl %ecx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: cmpl %esi, %eax
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %edx
+; X86-NEXT: movl (%esp), %ebp ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 1-byte Folded Reload
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: movl %ebp, (%esp) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: xorl %ecx, %edi
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: cmpl %esi, %ebp
+; X86-NEXT: movl (%esp), %esi ## 4-byte Reload
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: orl %ecx, %ebx
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: jmp LBB1_15
+; X86-NEXT: LBB1_2: ## %overflow.no.rhs.only
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: xorl %eax, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: subl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: js LBB1_4
+; X86-NEXT: ## %bb.3: ## %overflow.no.rhs.only
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %edi, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %ecx, %ebp
-; X86-NEXT: adcl %esi, %ebx
-; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: LBB1_4: ## %overflow.no.rhs.only
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: adcl %ebp, %edx
-; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %edi, %esi
-; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: xorl %eax, %ebp
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %eax, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %edi
-; X86-NEXT: setb %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %ebp, %esi
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edi, %ebp
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
-; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl %edi, (%esp) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: js LBB1_6
+; X86-NEXT: ## %bb.5: ## %overflow.no.rhs.only
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ebx, %esi
-; X86-NEXT: adcl %edi, %ecx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl (%esp), %esi ## 4-byte Folded Reload
-; X86-NEXT: movl %esi, (%esp) ## 4-byte Spill
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebx
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %edi, %ebp
-; X86-NEXT: sarl $31, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %ebp, %esi
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %edx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: addl %eax, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
-; X86-NEXT: adcl %esi, %edx
-; X86-NEXT: addl %ebp, %ecx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 1-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: adcl %edx, %ebp
-; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl (%esp), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: LBB1_6: ## %overflow.no.rhs.only
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ebx, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ecx, %esi
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ebx, %esi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %ebx
+; X86-NEXT: adcl %ecx, %ebp
; X86-NEXT: setb %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %esi, %edi
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ebx, %esi
-; X86-NEXT: setb %cl
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %esi, %ebx
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: adcl %eax, %ebp
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl $0, (%esp) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, %esi
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %edi, %esi
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: mull %edx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: addl %esi, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: adcl %esi, %ebx
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %edi, %esi
+; X86-NEXT: addl %ebx, %esi
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ecx
-; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, %esi
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: setb (%esp) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: addl %ebx, %ebp
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: addl %ebp, %eax
; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: adcl %edi, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: adcl %ecx, %ebx
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %edi
; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 1-byte Folded Reload
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: adcl %ebx, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl %ebp, %esi
-; X86-NEXT: adcl %ecx, %esi
-; X86-NEXT: movzbl (%esp), %ecx ## 1-byte Folded Reload
-; X86-NEXT: adcl %ecx, %eax
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %esi, %ebx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ebp
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
-; X86-NEXT: adcl $0, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: imull %eax, %ebx
+; X86-NEXT: movl (%esp), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: sarl $31, %eax
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: imull %esi, %ecx
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: adcl %ebp, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %edi
; X86-NEXT: addl %esi, %edi
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull %esi
-; X86-NEXT: movl %esi, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl %ecx, %esi
; X86-NEXT: setb %cl
; X86-NEXT: movl %ebx, %eax
@@ -866,203 +3112,277 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %edx, %esi
; X86-NEXT: addl %edi, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl %ebp, %edi
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %ebp
-; X86-NEXT: setb %bl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %ebp, %esi
-; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: adcl %eax, %ecx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
-; X86-NEXT: adcl (%esp), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; X86-NEXT: mull %ebx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edi, %ebp
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %edi
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
-; X86-NEXT: adcl %ebx, %edi
+; X86-NEXT: adcl %ecx, %edi
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %edi, %ebx
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ebp
-; X86-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl %ecx, (%esp) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: adcl %eax, %ecx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
; X86-NEXT: mull %esi
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
-; X86-NEXT: adcl $0, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %ebp
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
; X86-NEXT: mull %esi
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebp, %edi
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl %ebp, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %edi
; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %esi, %ebp
-; X86-NEXT: sarl $31, %ebp
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: addl %edx, %edi
-; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: adcl %ecx, %ebx
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %eax, %edi
-; X86-NEXT: adcl %edx, %ecx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: imull %ebp, %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
+; X86-NEXT: imull %edi, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload
; X86-NEXT: mull %ebp
-; X86-NEXT: addl %eax, %edx
-; X86-NEXT: addl %esi, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: adcl %edi, %edx
-; X86-NEXT: addl %ebx, %ecx
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 1-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: addl %eax, %ecx
-; X86-NEXT: adcl %edx, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: addl %ecx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
-; X86-NEXT: addl %esi, %eax
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: imull %ebp, %eax
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: mull %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: adcl %edx, (%esp) ## 4-byte Folded Spill
-; X86-NEXT: adcl $0, %eax
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: adcl $0, %ebp
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: sarl $31, %edx
+; X86-NEXT: imull %ecx, %edx
+; X86-NEXT: addl %edx, %esi
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: movzbl %cl, %ecx
+; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: adcl %edx, %eax
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: adcl %edx, %eax
+; X86-NEXT: movl (%esp), %eax ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: adcl %edx, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %cl ## 1-byte Folded Reload
+; X86-NEXT: movzbl %cl, %ebx
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %edx
+; X86-NEXT: xorl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %ecx
+; X86-NEXT: cmpl %ebx, %eax
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl $0, %edx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: xorl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %edx
+; X86-NEXT: movl (%esp), %ebx ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 1-byte Folded Reload
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ebx, (%esp) ## 4-byte Spill
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: xorl %ecx, %edi
+; X86-NEXT: xorl %ecx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; X86-NEXT: cmpl %esi, %ebx
+; X86-NEXT: movl (%esp), %esi ## 4-byte Reload
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: orl %ecx, %ebp
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: orl %ebp, %edi
+; X86-NEXT: LBB1_15: ## %overflow.res
+; X86-NEXT: setne %al
+; X86-NEXT: jmp LBB1_16
+; X86-NEXT: LBB1_13: ## %overflow.no
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %edi, %esi
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: adcl %ecx, %edi
; X86-NEXT: setb %cl
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: addl %edi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %ebp, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %edi
@@ -1073,58 +3393,60 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl %edi, %esi
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: movl %edx, %ebp
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: adcl %ecx, %ebp
; X86-NEXT: setb %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %ebp, %ecx
; X86-NEXT: movzbl %bl, %eax
-; X86-NEXT: adcl %eax, %ecx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl %eax, %edx
+; X86-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %esi, %ebx
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: mull %edx
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebp, %edi
+; X86-NEXT: adcl %ebx, %esi
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl %eax, %edi
-; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %eax, %ebx
+; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl (%esp), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl $0, %ebp
; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
+; X86-NEXT: setb (%esp) ## 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: mull %ecx
@@ -1133,15 +3455,15 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %esi, %edi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: mull %edx
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: addl %ebp, %eax
-; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, %edi
; X86-NEXT: adcl %ecx, %esi
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1150,136 +3472,151 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
; X86-NEXT: movl %eax, %esi
; X86-NEXT: movzbl %cl, %eax
; X86-NEXT: adcl %eax, %edx
-; X86-NEXT: addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
-; X86-NEXT: adcl %edi, %ebp
-; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
+; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %ebx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl (%esp), %eax ## 1-byte Folded Reload
; X86-NEXT: adcl %eax, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl $0, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebp
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: imull %ebx, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: addl %ebp, %esi
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: addl %ecx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl %ebp, %ebx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: imull %edi, %ebp
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: addl %eax, %edx
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: adcl %esi, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: addl %eax, %ebx
-; X86-NEXT: adcl %edx, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: imull %ecx, %edi
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %edi, %ebx
; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: addl %eax, %ebp
-; X86-NEXT: adcl %edx, %ecx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
+; X86-NEXT: adcl %ecx, %edi
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movzbl %bl, %ecx
+; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: imull %ecx, %ebx
+; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: imull %esi, %eax
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: imull %esi, %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %ebp, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %esi
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: addl %eax, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: addl %edi, %esi
-; X86-NEXT: adcl %ebp, %edx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 1-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: addl %esi, %ecx
-; X86-NEXT: adcl %edx, %eax
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: adcl %edi, %ebx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) ## 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 1-byte Folded Reload
+; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: addl (%esp), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: movl %edi, (%esp) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: LBB1_16: ## %overflow.res
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload
-; X86-NEXT: movl (%esp), %esi ## 4-byte Reload
-; X86-NEXT: movl %esi, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: orl %edi, %ebx
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: orl %ecx, %edi
-; X86-NEXT: orl %ebx, %edi
-; X86-NEXT: xorl %edx, %ebp
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: xorl %edx, %ecx
-; X86-NEXT: orl %ebp, %ecx
-; X86-NEXT: xorl %edx, %eax
-; X86-NEXT: xorl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: orl %edi, %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %esi, 28(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 16(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 20(%eax)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
-; X86-NEXT: movl %ecx, 24(%eax)
-; X86-NEXT: setne %al
+; X86-NEXT: movl %edx, 4(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: movl %edx, 8(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: movl %edx, 12(%ecx)
+; X86-NEXT: movl %esi, 16(%ecx)
+; X86-NEXT: movl (%esp), %edx ## 4-byte Reload
+; X86-NEXT: movl %edx, 20(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: movl %edx, 24(%ecx)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; X86-NEXT: movl %edx, 28(%ecx)
+; X86-NEXT: andb $1, %al
+; X86-NEXT: ## kill: def $al killed $al killed $eax
; X86-NEXT: addl $128, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
index 4c3170304b980..4ccb90a37ca71 100644
--- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -4,14 +4,19 @@
define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X64-LABEL: muloti_test:
-; X64: # %bb.0: # %start
+; X64: # %bb.0: # %overflow.entry
; X64-NEXT: movq %rdx, %r8
-; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: testq %rsi, %rsi
+; X64-NEXT: je .LBB0_3
+; X64-NEXT: # %bb.1: # %overflow.lhs
; X64-NEXT: testq %rcx, %rcx
-; X64-NEXT: setne %dl
+; X64-NEXT: je .LBB0_7
+; X64-NEXT: # %bb.2: # %overflow
+; X64-NEXT: setne %al
; X64-NEXT: testq %rsi, %rsi
; X64-NEXT: setne %r9b
-; X64-NEXT: andb %dl, %r9b
+; X64-NEXT: andb %al, %r9b
+; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %r8
; X64-NEXT: movq %rax, %rsi
; X64-NEXT: seto %r10b
@@ -26,10 +31,59 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X64-NEXT: addq %rcx, %rdx
; X64-NEXT: setb %cl
; X64-NEXT: orb %r11b, %cl
+; X64-NEXT: andb $1, %cl
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: retq
+; X64-NEXT: .LBB0_3: # %overflow.no.lhs
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %r8
+; X64-NEXT: testq %rcx, %rcx
+; X64-NEXT: je .LBB0_8
+; X64-NEXT: # %bb.4: # %overflow.no.lhs.only
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: imulq %rsi, %r8
+; X64-NEXT: addq %rdx, %r8
+; X64-NEXT: imulq %rcx, %rsi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: movq %r9, %rax
+; X64-NEXT: addq %r8, %rdx
+; X64-NEXT: adcq %rsi, %rcx
+; X64-NEXT: jmp .LBB0_5
+; X64-NEXT: .LBB0_7: # %overflow.no.rhs.only
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rax, %r9
+; X64-NEXT: imulq %rcx, %rdi
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: imulq %rsi, %rcx
+; X64-NEXT: movq %r8, %rax
+; X64-NEXT: mulq %rsi
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: movq %r9, %rax
+; X64-NEXT: addq %rdi, %rdx
+; X64-NEXT: adcq %rcx, %rsi
+; X64-NEXT: .LBB0_5: # %overflow.no.lhs.only
+; X64-NEXT: setne %cl
+; X64-NEXT: andb $1, %cl
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: retq
+; X64-NEXT: .LBB0_8: # %overflow.no
+; X64-NEXT: imulq %rcx, %rdi
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: imulq %r8, %rsi
+; X64-NEXT: addq %rdi, %rsi
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: movq %rsi, %rdx
+; X64-NEXT: andb $1, %cl
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: retq
;
; X86-LABEL: muloti_test:
-; X86: # %bb.0: # %start
+; X86: # %bb.0: # %overflow.entry
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %ebx
@@ -38,116 +92,352 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: .cfi_def_cfa_offset 44
+; X86-NEXT: subl $36, %esp
+; X86-NEXT: .cfi_def_cfa_offset 56
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: je .LBB0_4
+; X86-NEXT: # %bb.1: # %overflow.lhs
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: je .LBB0_2
+; X86-NEXT: # %bb.6: # %overflow
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: mull %esi
+; X86-NEXT: leal (%edi,%eax), %ecx
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: leal (%ecx,%eax), %esi
-; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ecx, %edi
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %ebx
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
-; X86-NEXT: leal (%esi,%eax), %esi
+; X86-NEXT: mull %ebp
+; X86-NEXT: leal (%ebx,%eax), %ebx
; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %eax, %ebp
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: addl %esi, %ebx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %ebp
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; X86-NEXT: adcl %ecx, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
-; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %edi
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %esi
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %edi, %ebx
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %esi, %ecx
+; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: adcl %ebp, %esi
; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: mull %edi
-; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: setne %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: setne %ah
+; X86-NEXT: andb %al, %ah
; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: addl %ebp, %eax
-; X86-NEXT: adcl %ebx, %edx
-; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: setne %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: setne %ch
-; X86-NEXT: andb %cl, %ch
-; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
-; X86-NEXT: orb %ch, %cl
+; X86-NEXT: orb %ah, %cl
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
-; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: testl %edi, %edi
-; X86-NEXT: setne %cl
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: setne %al
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: testl %edi, %edi
+; X86-NEXT: setne %ah
+; X86-NEXT: andb %al, %ah
+; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X86-NEXT: orb %ah, %al
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; X86-NEXT: orl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: setne %ch
-; X86-NEXT: andb %cl, %ch
-; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
-; X86-NEXT: orb %ch, %bl
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
-; X86-NEXT: orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: setne %bh
; X86-NEXT: orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: setne %bl
+; X86-NEXT: andb %ch, %bl
+; X86-NEXT: orb %al, %bl
+; X86-NEXT: orb %cl, %bl
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
+; X86-NEXT: jmp .LBB0_7
+; X86-NEXT: .LBB0_4: # %overflow.no.lhs
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: orl %ebx, %edx
+; X86-NEXT: je .LBB0_5
+; X86-NEXT: # %bb.3: # %overflow.no.lhs.only
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull %eax, %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: addl %esi, %eax
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebx, %edi
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edi, %esi
+; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %edx, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: imull {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: addl %edi, %ebp
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: orl %eax, %ebp
+; X86-NEXT: setne %bl
+; X86-NEXT: jmp .LBB0_7
+; X86-NEXT: .LBB0_2: # %overflow.no.rhs.only
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %ebp
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull %ebp
+; X86-NEXT: imull {{[0-9]+}}(%esp), %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: imull %ebp, %ecx
+; X86-NEXT: addl %esi, %ecx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: addl %edi, %esi
+; X86-NEXT: adcl %ebx, %ebp
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl %ebp, %ebx
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT: adcl %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: addl %ebp, %ecx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %edi, %ecx
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: setne %bl
+; X86-NEXT: jmp .LBB0_7
+; X86-NEXT: .LBB0_5: # %overflow.no
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull %edi, %ecx
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: imull %esi, %ebx
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull %esi, %eax
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: imull %edx, %ebp
+; X86-NEXT: addl %eax, %ebp
+; X86-NEXT: addl %edi, %ebx
+; X86-NEXT: adcl %ecx, %ebp
+; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, 4(%ecx)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, (%ecx)
-; X86-NEXT: movl %eax, 8(%ecx)
-; X86-NEXT: movl %edx, 12(%ecx)
-; X86-NEXT: setne %al
-; X86-NEXT: andb %bh, %al
-; X86-NEXT: orb %bl, %al
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, 16(%ecx)
; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: addl $24, %esp
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ebp, %edi
+; X86-NEXT: adcl $0, %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: mull %ecx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %edi, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl %esi, %ebp
+; X86-NEXT: setb %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: addl %ebp, %edx
+; X86-NEXT: movzbl %cl, %eax
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: addl %ebx, %edx
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: .LBB0_7: # %overflow.res
+; X86-NEXT: andb $1, %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: movb %bl, 16(%eax)
+; X86-NEXT: addl $36, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
diff --git a/llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll
index 132683cdb0f9e..99dc422a6b53e 100644
--- a/llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-64-legalisation-lowering.ll
@@ -3,7 +3,7 @@
define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; X86-LABEL: mulodi_test:
-; X86: # %bb.0: # %start
+; X86: # %bb.0: # %overflow.entry
; X86-NEXT: pushl %ebp
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: pushl %ebx
@@ -12,32 +12,89 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 20
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 24
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: setne %dl
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: je .LBB0_4
+; X86-NEXT: # %bb.1: # %overflow.lhs
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: je .LBB0_2
+; X86-NEXT: # %bb.6: # %overflow
+; X86-NEXT: setne %al
+; X86-NEXT: testl %edi, %edi
; X86-NEXT: setne %cl
-; X86-NEXT: andb %dl, %cl
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: andb %al, %cl
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %ebp
; X86-NEXT: movl %eax, %edi
+; X86-NEXT: seto %ch
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %esi
; X86-NEXT: seto %bl
+; X86-NEXT: orb %ch, %bl
+; X86-NEXT: orb %cl, %bl
+; X86-NEXT: leal (%edi,%eax), %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: seto %ch
-; X86-NEXT: orb %bl, %ch
-; X86-NEXT: orb %cl, %ch
-; X86-NEXT: leal (%edi,%eax), %esi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: setb %cl
+; X86-NEXT: orb %bl, %cl
+; X86-NEXT: jmp .LBB0_7
+; X86-NEXT: .LBB0_4: # %overflow.no.lhs
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: je .LBB0_5
+; X86-NEXT: # %bb.3: # %overflow.no.lhs.only
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: imull %edi, %ebp
+; X86-NEXT: addl %edx, %ebp
+; X86-NEXT: imull %ebx, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl %ebp, %edx
+; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: setne %cl
+; X86-NEXT: jmp .LBB0_7
+; X86-NEXT: .LBB0_2: # %overflow.no.rhs.only
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: imull %edi, %ebx
; X86-NEXT: movl %ebp, %eax
-; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: addl %esi, %edx
-; X86-NEXT: setb %cl
-; X86-NEXT: orb %ch, %cl
+; X86-NEXT: adcl %ebx, %ecx
+; X86-NEXT: setne %cl
+; X86-NEXT: jmp .LBB0_7
+; X86-NEXT: .LBB0_5: # %overflow.no
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: imull %ebp, %edi
+; X86-NEXT: addl %esi, %edi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: .LBB0_7: # %overflow.res
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: .cfi_def_cfa_offset 20
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index a076d0d762aa3..2601b73f26822 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -13,7 +13,7 @@ define {i64, i1} @t1() nounwind {
; CHECK-NEXT: retq
;
; WIN32-LABEL: t1:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: movl $72, %eax
; WIN32-NEXT: xorl %edx, %edx
; WIN32-NEXT: xorl %ecx, %ecx
@@ -30,7 +30,7 @@ define {i64, i1} @t2() nounwind {
; CHECK-NEXT: retq
;
; WIN32-LABEL: t2:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: xorl %eax, %eax
; WIN32-NEXT: xorl %edx, %edx
; WIN32-NEXT: xorl %ecx, %ecx
@@ -47,7 +47,7 @@ define {i64, i1} @t3() nounwind {
; CHECK-NEXT: retq
;
; WIN32-LABEL: t3:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: movl $-9, %eax
; WIN32-NEXT: movl $-1, %edx
; WIN32-NEXT: movb $1, %cl
@@ -204,59 +204,207 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: smuloi64:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $8, %esp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: subl $16, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN32-NEXT: movl %edi, %esi
-; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: imull %ebx, %esi
-; WIN32-NEXT: mull %ebx
-; WIN32-NEXT: movl %edx, %ecx
-; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: movl %edi, %eax
-; WIN32-NEXT: mull %ebx
-; WIN32-NEXT: movl %edx, %ebx
-; WIN32-NEXT: movl %eax, %ebp
-; WIN32-NEXT: addl %ecx, %ebp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: adcl %esi, %ebx
-; WIN32-NEXT: movl %ebx, %edi
-; WIN32-NEXT: sarl $31, %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %edi, %edx
+; WIN32-NEXT: sarl $31, %edx
; WIN32-NEXT: movl %ecx, %esi
+; WIN32-NEXT: subl %edx, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %ebx, %edx
+; WIN32-NEXT: je LBB6_13
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB6_2
+; WIN32-NEXT: # %bb.15: # %overflow
+; WIN32-NEXT: movl %ecx, %ebp
+; WIN32-NEXT: sarl $31, %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: imull %esi, %ebp
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: addl (%esp), %edi # 4-byte Folded Reload
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: movl %ebx, %esi
; WIN32-NEXT: sarl $31, %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: imull %eax, %esi
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %edx, %ebp
+; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: adcl %esi, %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %ebp, %edi
+; WIN32-NEXT: sarl $31, %edi
+; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; WIN32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: imull %ebx
; WIN32-NEXT: addl %ebp, %eax
-; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: adcl %esi, %ecx
-; WIN32-NEXT: movl %ecx, %ebp
-; WIN32-NEXT: sarl $31, %ebp
-; WIN32-NEXT: addl %ebx, %ecx
-; WIN32-NEXT: adcl %edi, %ebp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: imull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %ecx, %eax
-; WIN32-NEXT: adcl %ebp, %edx
-; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
-; WIN32-NEXT: movl %esi, %ecx
+; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; WIN32-NEXT: adcl %edi, %edx
+; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: xorl %ecx, %edx
; WIN32-NEXT: xorl %eax, %ecx
; WIN32-NEXT: orl %edx, %ecx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl %esi, 4(%eax)
-; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; WIN32-NEXT: movl %ecx, (%eax)
+; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: setne %al
-; WIN32-NEXT: addl $8, %esp
+; WIN32-NEXT: jmp LBB6_16
+; WIN32-NEXT: LBB6_13: # %overflow.no.lhs
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB6_14
+; WIN32-NEXT: # %bb.7: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ecx, %ebp
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: subl %eax, %esi
+; WIN32-NEXT: sbbl %eax, %ebp
+; WIN32-NEXT: testl %ecx, %ecx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB6_9
+; WIN32-NEXT: # %bb.8: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ecx, %ebp
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: LBB6_9: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %ecx
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: sets (%esp) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB6_11
+; WIN32-NEXT: # %bb.10: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: LBB6_11: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, %ebx
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %ecx, %ebp
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: addl %edi, %ecx
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; WIN32-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; WIN32-NEXT: jmp LBB6_12
+; WIN32-NEXT: LBB6_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edi, %edx
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %esi, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %ebp
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB6_4
+; WIN32-NEXT: # %bb.3: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: movl %esi, %edi
+; WIN32-NEXT: LBB6_4: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edi, %ebx
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ecx, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: movl %edx, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: subl %eax, %esi
+; WIN32-NEXT: sbbl %eax, %edi
+; WIN32-NEXT: testl %ecx, %ecx
+; WIN32-NEXT: sets (%esp) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB6_6
+; WIN32-NEXT: # %bb.5: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ecx, %edi
+; WIN32-NEXT: movl %edx, %esi
+; WIN32-NEXT: LBB6_6: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, %ebx
+; WIN32-NEXT: imull %ebp, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %edi, %ebp
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: addl %esi, %ecx
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; WIN32-NEXT: xorb (%esp), %al # 1-byte Folded Reload
+; WIN32-NEXT: LBB6_12: # %overflow.res
+; WIN32-NEXT: movzbl %al, %esi
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: negl %eax
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: addl %esi, %ebp
+; WIN32-NEXT: xorl %ebx, %ebx
+; WIN32-NEXT: movl %ebp, %edi
+; WIN32-NEXT: subl %esi, %edi
+; WIN32-NEXT: setb %bl
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: xorl %edx, %eax
+; WIN32-NEXT: movl %ecx, %edx
+; WIN32-NEXT: subl %ebx, %edx
+; WIN32-NEXT: adcl $0, %eax
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: jmp LBB6_16
+; WIN32-NEXT: LBB6_14: # %overflow.no
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: mull %edx
+; WIN32-NEXT: movl %eax, %ebp
+; WIN32-NEXT: imull %edi, %ebx
+; WIN32-NEXT: addl %edx, %ebx
+; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: xorl %eax, %eax
+; WIN32-NEXT: LBB6_16: # %overflow.res
+; WIN32-NEXT: movl %ebp, (%esi)
+; WIN32-NEXT: movl %ecx, 4(%esi)
+; WIN32-NEXT: andb $1, %al
+; WIN32-NEXT: # kill: def $al killed $al killed $eax
+; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
@@ -449,37 +597,93 @@ define zeroext i1 @umuloi64(i64 %v1, i64 %v2, ptr %res) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: umuloi64:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
+; WIN32-NEXT: pushl %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: testl %esi, %esi
-; WIN32-NEXT: setne %dl
-; WIN32-NEXT: testl %eax, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: je LBB10_5
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: je LBB10_2
+; WIN32-NEXT: # %bb.7: # %overflow
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: testl %ebx, %ebx
; WIN32-NEXT: setne %cl
-; WIN32-NEXT: andb %dl, %cl
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: andb %al, %cl
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: seto %bl
-; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: mull %edi
; WIN32-NEXT: seto %ch
; WIN32-NEXT: orb %bl, %ch
; WIN32-NEXT: orb %cl, %ch
-; WIN32-NEXT: leal (%edi,%eax), %esi
-; WIN32-NEXT: movl %ebp, %eax
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %esi, %edx
-; WIN32-NEXT: setb %cl
-; WIN32-NEXT: orb %ch, %cl
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: movl %eax, (%esi)
-; WIN32-NEXT: movl %edx, 4(%esi)
-; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: movl (%esp), %edx # 4-byte Reload
+; WIN32-NEXT: leal (%edx,%eax), %ebx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: addl %ebx, %eax
+; WIN32-NEXT: setb %dl
+; WIN32-NEXT: orb %ch, %dl
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: jmp LBB10_8
+; WIN32-NEXT: LBB10_5: # %overflow.no.lhs
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: je LBB10_6
+; WIN32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: imull %ebx, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %ebp, %ebx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: addl %esi, %eax
+; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
+; WIN32-NEXT: adcl %ebx, %edx
+; WIN32-NEXT: jmp LBB10_3
+; WIN32-NEXT: LBB10_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %ebx, %ebp
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: LBB10_3: # %overflow.res
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: setne %dl
+; WIN32-NEXT: jmp LBB10_8
+; WIN32-NEXT: LBB10_6: # %overflow.no
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %esi, %ebx
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: addl %edi, %ebx
+; WIN32-NEXT: xorl %edx, %edx
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: LBB10_8: # %overflow.res
+; WIN32-NEXT: movl %esi, (%ecx)
+; WIN32-NEXT: movl %eax, 4(%ecx)
+; WIN32-NEXT: andb $1, %dl
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
@@ -547,75 +751,224 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: smuloselecti64:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: pushl %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: subl $8, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; WIN32-NEXT: movl %ebx, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: imull %edi, %esi
-; WIN32-NEXT: mull %edi
-; WIN32-NEXT: movl %edx, %ecx
-; WIN32-NEXT: movl %ebx, %eax
-; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: sarl $31, %ecx
+; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: subl %ecx, %edx
+; WIN32-NEXT: je LBB12_13
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: subl %esi, %ecx
+; WIN32-NEXT: je LBB12_2
+; WIN32-NEXT: # %bb.15: # %overflow
+; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: sarl $31, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: imull %esi, %ecx
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: mull %esi
; WIN32-NEXT: movl %edx, %ebx
-; WIN32-NEXT: movl %eax, %ebp
-; WIN32-NEXT: addl %ecx, %ebp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: adcl %esi, %ebx
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: addl %edi, %esi
+; WIN32-NEXT: adcl %ecx, %ebx
; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: sarl $31, %eax
-; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: movl %ecx, %esi
-; WIN32-NEXT: sarl $31, %esi
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: sarl $31, %ecx
+; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: imull %eax, %esi
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: mull {{[0-9]+}}(%esp)
+; WIN32-NEXT: movl %edx, %ebp
; WIN32-NEXT: movl %eax, %edi
-; WIN32-NEXT: addl %ebp, %edi
-; WIN32-NEXT: adcl %esi, %ecx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: movl %ecx, %ebp
-; WIN32-NEXT: sarl $31, %ebp
-; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: addl %esi, %edi
+; WIN32-NEXT: adcl %ecx, %ebp
+; WIN32-NEXT: movl %ebp, %esi
+; WIN32-NEXT: sarl $31, %esi
+; WIN32-NEXT: addl %ebx, %ebp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; WIN32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload
-; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: imull %ebx
-; WIN32-NEXT: addl %ecx, %eax
-; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: addl %ebp, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: adcl %esi, %edx
; WIN32-NEXT: sarl $31, %edi
; WIN32-NEXT: xorl %edi, %edx
; WIN32-NEXT: xorl %eax, %edi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: orl %edx, %edi
-; WIN32-NEXT: jne LBB12_2
-; WIN32-NEXT: # %bb.1:
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: setne %cl
+; WIN32-NEXT: testb $1, %cl
+; WIN32-NEXT: je LBB12_17
+; WIN32-NEXT: jmp LBB12_18
+; WIN32-NEXT: LBB12_13: # %overflow.no.lhs
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: subl %esi, %ecx
+; WIN32-NEXT: je LBB12_14
+; WIN32-NEXT: # %bb.8: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: sarl $31, %ecx
+; WIN32-NEXT: movl %ebp, %esi
+; WIN32-NEXT: xorl %ecx, %esi
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: xorl %ecx, %edi
+; WIN32-NEXT: subl %ecx, %edi
+; WIN32-NEXT: sbbl %ecx, %esi
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB12_10
+; WIN32-NEXT: # %bb.9: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebp, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: LBB12_10: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: subl %eax, %ebp
+; WIN32-NEXT: sbbl %eax, %ecx
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB12_12
+; WIN32-NEXT: # %bb.11: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: LBB12_12: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %eax, %ebx
+; WIN32-NEXT: imull %esi, %ebp
+; WIN32-NEXT: addl %edx, %ebp
+; WIN32-NEXT: imull %ecx, %esi
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: addl %ebp, %eax
+; WIN32-NEXT: adcl %esi, %edi
+; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; WIN32-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; WIN32-NEXT: movzbl %cl, %esi
+; WIN32-NEXT: movl %esi, %ecx
+; WIN32-NEXT: negl %ecx
+; WIN32-NEXT: xorl %ecx, %ebx
+; WIN32-NEXT: addl %esi, %ebx
+; WIN32-NEXT: xorl %edx, %edx
+; WIN32-NEXT: subl %esi, %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: setb %dl
+; WIN32-NEXT: xorl %ecx, %eax
+; WIN32-NEXT: addl %edx, %eax
+; WIN32-NEXT: xorl %edi, %ecx
+; WIN32-NEXT: subl %edx, %eax
+; WIN32-NEXT: adcl $0, %ecx
+; WIN32-NEXT: setne %cl
+; WIN32-NEXT: jmp LBB12_7
+; WIN32-NEXT: LBB12_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
; WIN32-NEXT: movl %ebx, %esi
-; WIN32-NEXT: LBB12_2:
-; WIN32-NEXT: movl %esi, %edx
-; WIN32-NEXT: addl $4, %esp
-; WIN32-NEXT: popl %esi
-; WIN32-NEXT: popl %edi
-; WIN32-NEXT: popl %ebx
-; WIN32-NEXT: popl %ebp
-; WIN32-NEXT: retl
- %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
- %obit = extractvalue {i64, i1} %t, 1
- %ret = select i1 %obit, i64 %v1, i64 %v2
- ret i64 %ret
-}
-
-define i32 @umuloselecti32(i32 %v1, i32 %v2) {
-; LINUX-LABEL: umuloselecti32:
-; LINUX: # %bb.0:
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %esi
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB12_4
+; WIN32-NEXT: # %bb.3: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebx, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: LBB12_4: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: subl %eax, %ebp
+; WIN32-NEXT: sbbl %eax, %ecx
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB12_6
+; WIN32-NEXT: # %bb.5: # %overflow.no.rhs.only
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: LBB12_6: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %eax, %ebx
+; WIN32-NEXT: imull %esi, %ebp
+; WIN32-NEXT: addl %edx, %ebp
+; WIN32-NEXT: imull %ecx, %esi
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: addl %ebp, %eax
+; WIN32-NEXT: adcl %esi, %edi
+; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; WIN32-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; WIN32-NEXT: movzbl %cl, %esi
+; WIN32-NEXT: movl %esi, %ecx
+; WIN32-NEXT: negl %ecx
+; WIN32-NEXT: xorl %ecx, %ebx
+; WIN32-NEXT: addl %esi, %ebx
+; WIN32-NEXT: xorl %edx, %edx
+; WIN32-NEXT: subl %esi, %ebx
+; WIN32-NEXT: setb %dl
+; WIN32-NEXT: xorl %ecx, %eax
+; WIN32-NEXT: addl %edx, %eax
+; WIN32-NEXT: xorl %edi, %ecx
+; WIN32-NEXT: subl %edx, %eax
+; WIN32-NEXT: adcl $0, %ecx
+; WIN32-NEXT: setne %cl
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: LBB12_7: # %overflow.res
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: testb $1, %cl
+; WIN32-NEXT: jne LBB12_18
+; WIN32-NEXT: LBB12_17: # %overflow.res
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: LBB12_18: # %overflow.res
+; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: addl $8, %esp
+; WIN32-NEXT: popl %esi
+; WIN32-NEXT: popl %edi
+; WIN32-NEXT: popl %ebx
+; WIN32-NEXT: popl %ebp
+; WIN32-NEXT: retl
+; WIN32-NEXT: LBB12_14: # %overflow.no
+; WIN32-NEXT: xorl %ecx, %ecx
+; WIN32-NEXT: testb $1, %cl
+; WIN32-NEXT: je LBB12_17
+; WIN32-NEXT: jmp LBB12_18
+ %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
+ %obit = extractvalue {i64, i1} %t, 1
+ %ret = select i1 %obit, i64 %v1, i64 %v2
+ ret i64 %ret
+}
+
+define i32 @umuloselecti32(i32 %v1, i32 %v2) {
+; LINUX-LABEL: umuloselecti32:
+; LINUX: # %bb.0:
; LINUX-NEXT: movl %edi, %eax
; LINUX-NEXT: mull %esi
; LINUX-NEXT: cmovol %edi, %esi
@@ -670,45 +1023,86 @@ define i64 @umuloselecti64(i64 %v1, i64 %v2) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: umuloselecti64:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: pushl %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: testl %esi, %esi
+; WIN32-NEXT: je LBB14_5
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: testl %edi, %edi
+; WIN32-NEXT: je LBB14_2
+; WIN32-NEXT: # %bb.7: # %overflow
; WIN32-NEXT: setne %al
; WIN32-NEXT: testl %esi, %esi
-; WIN32-NEXT: setne %bl
-; WIN32-NEXT: andb %al, %bl
+; WIN32-NEXT: setne %cl
+; WIN32-NEXT: andb %al, %cl
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %edi
-; WIN32-NEXT: movl %edi, %edx
-; WIN32-NEXT: movl %eax, %edi
-; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; WIN32-NEXT: movl %ebp, %eax
-; WIN32-NEXT: movl %edx, %ebp
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: seto %bh
-; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
-; WIN32-NEXT: orb %bl, %bh
-; WIN32-NEXT: addl %eax, %edi
-; WIN32-NEXT: movl %ecx, %eax
; WIN32-NEXT: mull %ebp
-; WIN32-NEXT: addl %edi, %edx
+; WIN32-NEXT: movl %eax, %ebp
+; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: seto %ch
+; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Folded Reload
+; WIN32-NEXT: orb %cl, %ch
+; WIN32-NEXT: addl %eax, %ebp
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: mull %edx
+; WIN32-NEXT: addl %ebp, %edx
; WIN32-NEXT: setb %al
-; WIN32-NEXT: orb %bh, %al
-; WIN32-NEXT: testb %al, %al
-; WIN32-NEXT: jne LBB14_2
-; WIN32-NEXT: # %bb.1:
+; WIN32-NEXT: orb %ch, %al
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: je LBB14_9
+; WIN32-NEXT: jmp LBB14_10
+; WIN32-NEXT: LBB14_5: # %overflow.no.lhs
+; WIN32-NEXT: testl %edi, %edi
+; WIN32-NEXT: je LBB14_6
+; WIN32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: movl %ebp, %edi
; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: imull %esi, %ecx
+; WIN32-NEXT: addl %edx, %ecx
+; WIN32-NEXT: movl %esi, %ebp
+; WIN32-NEXT: imull %eax, %ebp
+; WIN32-NEXT: movl %eax, %edx
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %edx
+; WIN32-NEXT: jmp LBB14_3
+; WIN32-NEXT: LBB14_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: imull %edi, %ecx
+; WIN32-NEXT: addl %edx, %ecx
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: movl %edi, %ebp
+; WIN32-NEXT: imull %esi, %ebp
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: LBB14_3: # %overflow.res
+; WIN32-NEXT: addl %ecx, %eax
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: jne LBB14_10
+; WIN32-NEXT: LBB14_9: # %overflow.res
+; WIN32-NEXT: movl %edi, %ebx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: LBB14_2:
-; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: LBB14_10: # %overflow.res
+; WIN32-NEXT: movl %ebx, %eax
; WIN32-NEXT: movl %esi, %edx
; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
@@ -716,6 +1110,12 @@ define i64 @umuloselecti64(i64 %v1, i64 %v2) {
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
+; WIN32-NEXT: LBB14_6: # %overflow.no
+; WIN32-NEXT: movl %ebp, %edi
+; WIN32-NEXT: xorl %eax, %eax
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: je LBB14_9
+; WIN32-NEXT: jmp LBB14_10
%t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
%ret = select i1 %obit, i64 %v1, i64 %v2
@@ -952,35 +1352,47 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: smulobri64:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: pushl %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: subl $8, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: movl %ebp, %ecx
-; WIN32-NEXT: sarl $31, %ecx
-; WIN32-NEXT: imull %edi, %ecx
-; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: sarl $31, %edx
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: subl %edx, %esi
+; WIN32-NEXT: je LBB18_12
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB18_2
+; WIN32-NEXT: # %bb.14: # %overflow1
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: sarl $31, %esi
+; WIN32-NEXT: imull %ecx, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: movl %edx, %ebx
-; WIN32-NEXT: movl %ebp, %eax
-; WIN32-NEXT: mull %edi
+; WIN32-NEXT: mull %ecx
; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: movl %ebp, %ecx
; WIN32-NEXT: movl %eax, %ebp
; WIN32-NEXT: addl %ebx, %ebp
-; WIN32-NEXT: adcl %ecx, %edi
+; WIN32-NEXT: adcl %esi, %edi
; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: sarl $31, %eax
-; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: movl %ecx, %edx
; WIN32-NEXT: sarl $31, %ecx
-; WIN32-NEXT: imull %esi, %ecx
-; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: imull %eax, %ecx
; WIN32-NEXT: mull %edx
; WIN32-NEXT: movl %edx, %ebx
; WIN32-NEXT: movl %eax, %esi
@@ -989,7 +1401,7 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
; WIN32-NEXT: movl %ebx, %ebp
; WIN32-NEXT: sarl $31, %ebp
; WIN32-NEXT: addl %edi, %ebx
-; WIN32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload
+; WIN32-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: imull {{[0-9]+}}(%esp)
; WIN32-NEXT: addl %ebx, %eax
@@ -998,19 +1410,148 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
; WIN32-NEXT: xorl %esi, %edx
; WIN32-NEXT: xorl %eax, %esi
; WIN32-NEXT: orl %edx, %esi
-; WIN32-NEXT: jne LBB18_1
-; WIN32-NEXT: # %bb.3: # %continue
+; WIN32-NEXT: jmp LBB18_15
+; WIN32-NEXT: LBB18_12: # %overflow.no.lhs
+; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB18_13
+; WIN32-NEXT: # %bb.7: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %edi, %edx
+; WIN32-NEXT: movl %edi, %ebx
+; WIN32-NEXT: xorl %eax, %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %esi, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %ebx
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB18_9
+; WIN32-NEXT: # %bb.8: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %edx, %ebx
+; WIN32-NEXT: movl %esi, %edi
+; WIN32-NEXT: LBB18_9: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: movl %ecx, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: subl %eax, %esi
+; WIN32-NEXT: sbbl %eax, %ebp
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB18_11
+; WIN32-NEXT: # %bb.10: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %edx, %ebp
+; WIN32-NEXT: movl %ecx, %esi
+; WIN32-NEXT: LBB18_11: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: imull %ebx, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %ebp, %ebx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: addl %esi, %eax
+; WIN32-NEXT: adcl %ebx, %edx
+; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload
+; WIN32-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
+; WIN32-NEXT: movzbl %bl, %edi
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: negl %esi
+; WIN32-NEXT: xorl %esi, %ecx
+; WIN32-NEXT: addl %edi, %ecx
+; WIN32-NEXT: xorl %ebx, %ebx
+; WIN32-NEXT: subl %edi, %ecx
+; WIN32-NEXT: setb %bl
+; WIN32-NEXT: xorl %esi, %eax
+; WIN32-NEXT: addl %ebx, %eax
+; WIN32-NEXT: xorl %edx, %esi
+; WIN32-NEXT: subl %ebx, %eax
+; WIN32-NEXT: adcl $0, %esi
+; WIN32-NEXT: jmp LBB18_15
+; WIN32-NEXT: LBB18_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edi, %edx
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebp, %ebx
+; WIN32-NEXT: xorl %eax, %ebx
+; WIN32-NEXT: movl %ecx, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %ebx
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: js LBB18_4
+; WIN32-NEXT: # %bb.3: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebp, %ebx
+; WIN32-NEXT: movl %ecx, %edi
+; WIN32-NEXT: LBB18_4: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: movl %esi, %ebp
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: subl %eax, %ebp
+; WIN32-NEXT: sbbl %eax, %ecx
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB18_6
+; WIN32-NEXT: # %bb.5: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: movl %esi, %ebp
+; WIN32-NEXT: LBB18_6: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: imull %ebx, %ebp
+; WIN32-NEXT: addl %edx, %ebp
+; WIN32-NEXT: imull %ecx, %ebx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: addl %ebp, %eax
+; WIN32-NEXT: adcl %ebx, %edx
+; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; WIN32-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; WIN32-NEXT: movzbl %cl, %edi
+; WIN32-NEXT: movl %edi, %ecx
+; WIN32-NEXT: negl %ecx
+; WIN32-NEXT: xorl %ecx, %esi
+; WIN32-NEXT: addl %edi, %esi
+; WIN32-NEXT: xorl %ebx, %ebx
+; WIN32-NEXT: subl %edi, %esi
+; WIN32-NEXT: setb %bl
+; WIN32-NEXT: xorl %ecx, %eax
+; WIN32-NEXT: addl %ebx, %eax
+; WIN32-NEXT: xorl %edx, %ecx
+; WIN32-NEXT: subl %ebx, %eax
+; WIN32-NEXT: adcl $0, %ecx
+; WIN32-NEXT: LBB18_15: # %overflow.res
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: jne LBB18_17
+; WIN32-NEXT: LBB18_19: # %continue
; WIN32-NEXT: movb $1, %al
-; WIN32-NEXT: LBB18_2: # %overflow
-; WIN32-NEXT: addl $4, %esp
+; WIN32-NEXT: LBB18_18: # %overflow
+; WIN32-NEXT: addl $8, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
-; WIN32-NEXT: LBB18_1: # %overflow
+; WIN32-NEXT: LBB18_13: # %overflow.no
; WIN32-NEXT: xorl %eax, %eax
-; WIN32-NEXT: jmp LBB18_2
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: je LBB18_19
+; WIN32-NEXT: LBB18_17: # %overflow
+; WIN32-NEXT: xorl %eax, %eax
+; WIN32-NEXT: jmp LBB18_18
%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
@@ -1261,46 +1802,90 @@ define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: umulobri64:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: pushl %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: testl %esi, %esi
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: je LBB22_5
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: testl %edi, %edi
+; WIN32-NEXT: je LBB22_2
+; WIN32-NEXT: # %bb.7: # %overflow1
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: testl %ebx, %ebx
; WIN32-NEXT: setne %dl
-; WIN32-NEXT: testl %eax, %eax
-; WIN32-NEXT: setne %cl
-; WIN32-NEXT: andb %dl, %cl
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: andb %al, %dl
+; WIN32-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: movl %eax, %ebp
; WIN32-NEXT: seto %bl
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: seto %bh
+; WIN32-NEXT: orb %bl, %bh
+; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
+; WIN32-NEXT: leal (%ebp,%eax), %edi
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %ebp
-; WIN32-NEXT: seto %ch
-; WIN32-NEXT: orb %bl, %ch
-; WIN32-NEXT: orb %cl, %ch
-; WIN32-NEXT: leal (%edi,%eax), %esi
-; WIN32-NEXT: movl %ebp, %eax
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %esi, %edx
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: addl %edi, %edx
; WIN32-NEXT: setb %al
-; WIN32-NEXT: orb %ch, %al
-; WIN32-NEXT: subb $1, %al
-; WIN32-NEXT: je LBB22_1
-; WIN32-NEXT: # %bb.3: # %continue
+; WIN32-NEXT: orb %bh, %al
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: je LBB22_11
+; WIN32-NEXT: jmp LBB22_9
+; WIN32-NEXT: LBB22_5: # %overflow.no.lhs
+; WIN32-NEXT: testl %edi, %edi
+; WIN32-NEXT: je LBB22_6
+; WIN32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: imull %ebx, %ecx
+; WIN32-NEXT: addl %edx, %ecx
+; WIN32-NEXT: imull %edi, %ebx
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: addl %ecx, %eax
+; WIN32-NEXT: adcl %ebx, %edx
+; WIN32-NEXT: jmp LBB22_3
+; WIN32-NEXT: LBB22_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: imull %edi, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %ebx, %edi
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: addl %esi, %eax
+; WIN32-NEXT: adcl %edi, %edx
+; WIN32-NEXT: LBB22_3: # %overflow.res
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: jne LBB22_9
+; WIN32-NEXT: LBB22_11: # %continue
; WIN32-NEXT: movb $1, %al
-; WIN32-NEXT: LBB22_2: # %overflow
+; WIN32-NEXT: LBB22_10: # %overflow
+; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: popl %ebp
; WIN32-NEXT: retl
-; WIN32-NEXT: LBB22_1: # %overflow
+; WIN32-NEXT: LBB22_6: # %overflow.no
+; WIN32-NEXT: xorl %eax, %eax
+; WIN32-NEXT: testb $1, %al
+; WIN32-NEXT: je LBB22_11
+; WIN32-NEXT: LBB22_9: # %overflow
; WIN32-NEXT: xorl %eax, %eax
-; WIN32-NEXT: jmp LBB22_2
+; WIN32-NEXT: jmp LBB22_10
%t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
@@ -1334,18 +1919,33 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: bug27873:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebx
-; WIN32-NEXT: movl $160, %eax
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %eax, %ecx
-; WIN32-NEXT: seto %bl
-; WIN32-NEXT: movl $160, %eax
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %ecx, %edx
-; WIN32-NEXT: setb %al
-; WIN32-NEXT: orb %bl, %al
-; WIN32-NEXT: orb {{[0-9]+}}(%esp), %al
+; WIN32-NEXT: pushl %edi
+; WIN32-NEXT: pushl %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: testl %esi, %esi
+; WIN32-NEXT: je LBB23_2
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl $160, %ebx
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: adcl $0, %edx
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: jmp LBB23_3
+; WIN32-NEXT: LBB23_2: # %overflow.no.lhs
+; WIN32-NEXT: xorl %eax, %eax
+; WIN32-NEXT: LBB23_3: # %overflow.res
+; WIN32-NEXT: orb %al, %cl
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: popl %esi
+; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
; WIN32-NEXT: retl
%mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160)
@@ -1635,62 +2235,208 @@ define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: smuloi64_load:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $12, %esp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: subl $16, %esp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl (%eax), %ecx
-; WIN32-NEXT: movl 4(%eax), %ebp
-; WIN32-NEXT: movl %ebp, %esi
-; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: imull %ebx, %esi
+; WIN32-NEXT: movl (%eax), %edi
+; WIN32-NEXT: movl 4(%eax), %ecx
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %edi, %edx
+; WIN32-NEXT: sarl $31, %edx
+; WIN32-NEXT: movl %ecx, %esi
+; WIN32-NEXT: subl %edx, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %ebx, %edx
+; WIN32-NEXT: je LBB30_13
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB30_2
+; WIN32-NEXT: # %bb.15: # %overflow
+; WIN32-NEXT: movl %ecx, %ebp
+; WIN32-NEXT: sarl $31, %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: imull %esi, %ebp
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %edx, (%esp) # 4-byte Spill
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: mull %ebx
-; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: movl %ebx, %esi
+; WIN32-NEXT: sarl $31, %esi
+; WIN32-NEXT: imull %edi, %esi
+; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull %ebx
-; WIN32-NEXT: movl %edx, %ebx
-; WIN32-NEXT: movl %eax, %ebp
-; WIN32-NEXT: addl %edi, %ebp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: adcl %esi, %ebx
-; WIN32-NEXT: movl %ebx, %edi
+; WIN32-NEXT: movl %edx, %ebp
+; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; WIN32-NEXT: adcl %esi, %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl %ebp, %edi
; WIN32-NEXT: sarl $31, %edi
-; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: imull %ecx, %esi
+; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; WIN32-NEXT: adcl (%esp), %edi # 4-byte Folded Reload
; WIN32-NEXT: movl %ecx, %eax
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: imull %ebx
; WIN32-NEXT: addl %ebp, %eax
-; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; WIN32-NEXT: adcl %esi, %ecx
-; WIN32-NEXT: movl %ecx, %ebp
-; WIN32-NEXT: sarl $31, %ebp
-; WIN32-NEXT: addl %ebx, %ecx
-; WIN32-NEXT: adcl %edi, %ebp
-; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; WIN32-NEXT: imull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %ecx, %eax
-; WIN32-NEXT: adcl %ebp, %edx
-; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
-; WIN32-NEXT: movl %esi, %ecx
+; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; WIN32-NEXT: adcl %edi, %edx
+; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: xorl %ecx, %edx
; WIN32-NEXT: xorl %eax, %ecx
; WIN32-NEXT: orl %edx, %ecx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl %esi, 4(%eax)
-; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; WIN32-NEXT: movl %ecx, (%eax)
+; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: setne %al
-; WIN32-NEXT: addl $12, %esp
+; WIN32-NEXT: jmp LBB30_16
+; WIN32-NEXT: LBB30_13: # %overflow.no.lhs
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB30_14
+; WIN32-NEXT: # %bb.7: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ecx, %ebp
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: subl %eax, %esi
+; WIN32-NEXT: sbbl %eax, %ebp
+; WIN32-NEXT: testl %ecx, %ecx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB30_9
+; WIN32-NEXT: # %bb.8: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ecx, %ebp
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: LBB30_9: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %ecx
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: sets (%esp) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB30_11
+; WIN32-NEXT: # %bb.10: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebx, %ecx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: LBB30_11: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, %ebx
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %ecx, %ebp
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: addl %edi, %ecx
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; WIN32-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; WIN32-NEXT: jmp LBB30_12
+; WIN32-NEXT: LBB30_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: subl %eax, %esi
+; WIN32-NEXT: sbbl %eax, %ebp
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB30_4
+; WIN32-NEXT: # %bb.3: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebx, %ebp
+; WIN32-NEXT: movl %edx, %esi
+; WIN32-NEXT: LBB30_4: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %esi, %edx
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ecx, %ebx
+; WIN32-NEXT: xorl %eax, %ebx
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: subl %eax, %esi
+; WIN32-NEXT: sbbl %eax, %ebx
+; WIN32-NEXT: testl %ecx, %ecx
+; WIN32-NEXT: sets (%esp) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB30_6
+; WIN32-NEXT: # %bb.5: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ecx, %ebx
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: LBB30_6: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: imull %ebp, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %ebx, %ebp
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: addl %esi, %ecx
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: movl %edi, %ebp
+; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; WIN32-NEXT: xorb (%esp), %al # 1-byte Folded Reload
+; WIN32-NEXT: LBB30_12: # %overflow.res
+; WIN32-NEXT: movzbl %al, %esi
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: negl %eax
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: addl %esi, %ebp
+; WIN32-NEXT: xorl %ebx, %ebx
+; WIN32-NEXT: movl %ebp, %edi
+; WIN32-NEXT: subl %esi, %edi
+; WIN32-NEXT: setb %bl
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: xorl %edx, %eax
+; WIN32-NEXT: movl %ecx, %edx
+; WIN32-NEXT: subl %ebx, %edx
+; WIN32-NEXT: adcl $0, %eax
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: jmp LBB30_16
+; WIN32-NEXT: LBB30_14: # %overflow.no
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: mull %edx
+; WIN32-NEXT: movl %eax, %ebp
+; WIN32-NEXT: imull %edi, %ebx
+; WIN32-NEXT: addl %edx, %ebx
+; WIN32-NEXT: imull {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: xorl %eax, %eax
+; WIN32-NEXT: LBB30_16: # %overflow.res
+; WIN32-NEXT: movl %ebp, (%esi)
+; WIN32-NEXT: movl %ecx, 4(%esi)
+; WIN32-NEXT: andb $1, %al
+; WIN32-NEXT: # kill: def $al killed $al killed $eax
+; WIN32-NEXT: addl $16, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
@@ -1728,61 +2474,206 @@ define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: smuloi64_load2:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
; WIN32-NEXT: subl $12, %esp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: movl (%ecx), %ebx
-; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl (%edx), %ebx
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: sarl $31, %esi
+; WIN32-NEXT: movl %ecx, %edi
+; WIN32-NEXT: subl %esi, %edi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl 4(%edx), %ebp
+; WIN32-NEXT: movl %ebp, %edx
+; WIN32-NEXT: je LBB31_13
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB31_2
+; WIN32-NEXT: # %bb.15: # %overflow
+; WIN32-NEXT: movl %ecx, %esi
; WIN32-NEXT: sarl $31, %esi
; WIN32-NEXT: imull %ebx, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl %edi, %eax
; WIN32-NEXT: mull %ebx
-; WIN32-NEXT: movl %edx, %ecx
+; WIN32-NEXT: movl %edx, (%esp) # 4-byte Spill
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: movl %ecx, %eax
; WIN32-NEXT: mull %ebx
; WIN32-NEXT: movl %edx, %ebx
-; WIN32-NEXT: movl %eax, %ebp
-; WIN32-NEXT: addl %ecx, %ebp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl 4(%eax), %ecx
-; WIN32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; WIN32-NEXT: addl (%esp), %eax # 4-byte Folded Reload
+; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; WIN32-NEXT: adcl %esi, %ebx
-; WIN32-NEXT: movl %ebx, %edi
-; WIN32-NEXT: sarl $31, %edi
-; WIN32-NEXT: movl %ecx, %esi
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: movl %ebp, %esi
; WIN32-NEXT: sarl $31, %esi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: imull %eax, %esi
-; WIN32-NEXT: mull %ecx
-; WIN32-NEXT: movl %edx, %ecx
-; WIN32-NEXT: addl %ebp, %eax
+; WIN32-NEXT: imull %edi, %esi
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; WIN32-NEXT: adcl %esi, %ecx
-; WIN32-NEXT: movl %ecx, %ebp
-; WIN32-NEXT: sarl $31, %ebp
-; WIN32-NEXT: addl %ebx, %ecx
-; WIN32-NEXT: adcl %edi, %ebp
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: imull (%esp) # 4-byte Folded Reload
-; WIN32-NEXT: addl %ecx, %eax
-; WIN32-NEXT: adcl %ebp, %edx
-; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; WIN32-NEXT: movl %esi, %ecx
+; WIN32-NEXT: adcl %esi, %edi
+; WIN32-NEXT: movl %edi, %esi
+; WIN32-NEXT: sarl $31, %esi
+; WIN32-NEXT: addl %ebx, %edi
+; WIN32-NEXT: adcl (%esp), %esi # 4-byte Folded Reload
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: imull %ebp
+; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: adcl %esi, %edx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: sarl $31, %ecx
; WIN32-NEXT: xorl %ecx, %edx
; WIN32-NEXT: xorl %eax, %ecx
; WIN32-NEXT: orl %edx, %ecx
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl %esi, 4(%eax)
-; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; WIN32-NEXT: movl %ecx, (%eax)
+; WIN32-NEXT: movl %edi, %ecx
; WIN32-NEXT: setne %al
+; WIN32-NEXT: jmp LBB31_16
+; WIN32-NEXT: LBB31_13: # %overflow.no.lhs
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: je LBB31_14
+; WIN32-NEXT: # %bb.8: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ecx, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %esi
+; WIN32-NEXT: testl %ecx, %ecx
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB31_10
+; WIN32-NEXT: # %bb.9: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ecx, %esi
+; WIN32-NEXT: movl %edx, %edi
+; WIN32-NEXT: LBB31_10: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %edi, %edx
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: movl %ebx, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: subl %eax, %edi
+; WIN32-NEXT: sbbl %eax, %ecx
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: sets (%esp) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB31_12
+; WIN32-NEXT: # %bb.11: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: movl %ebx, %edi
+; WIN32-NEXT: LBB31_12: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %edx, %ebx
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, %ebp
+; WIN32-NEXT: imull %esi, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %ecx, %esi
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %ecx
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: addl %edi, %ecx
+; WIN32-NEXT: adcl %esi, %edx
+; WIN32-NEXT: movzbl (%esp), %eax # 1-byte Folded Reload
+; WIN32-NEXT: xorb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
+; WIN32-NEXT: jmp LBB31_7
+; WIN32-NEXT: LBB31_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ebp, %edi
+; WIN32-NEXT: xorl %eax, %edi
+; WIN32-NEXT: movl %ebx, %edx
+; WIN32-NEXT: xorl %eax, %edx
+; WIN32-NEXT: subl %eax, %edx
+; WIN32-NEXT: sbbl %eax, %edi
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: sets {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB31_4
+; WIN32-NEXT: # %bb.3: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebp, %edi
+; WIN32-NEXT: movl %ebx, %edx
+; WIN32-NEXT: LBB31_4: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edx, %ebp
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: sarl $31, %eax
+; WIN32-NEXT: movl %ecx, %ebx
+; WIN32-NEXT: xorl %eax, %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl %edx, %esi
+; WIN32-NEXT: xorl %eax, %esi
+; WIN32-NEXT: subl %eax, %esi
+; WIN32-NEXT: sbbl %eax, %ebx
+; WIN32-NEXT: testl %ecx, %ecx
+; WIN32-NEXT: sets (%esp) # 1-byte Folded Spill
+; WIN32-NEXT: js LBB31_6
+; WIN32-NEXT: # %bb.5: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ecx, %ebx
+; WIN32-NEXT: movl %edx, %esi
+; WIN32-NEXT: LBB31_6: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %ebp, %ecx
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, %ebp
+; WIN32-NEXT: imull %edi, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %ebx, %edi
+; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: movl %eax, %ecx
+; WIN32-NEXT: addl %esi, %ecx
+; WIN32-NEXT: adcl %edi, %edx
+; WIN32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; WIN32-NEXT: xorb (%esp), %al # 1-byte Folded Reload
+; WIN32-NEXT: LBB31_7: # %overflow.res
+; WIN32-NEXT: movzbl %al, %esi
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: negl %eax
+; WIN32-NEXT: xorl %eax, %ebp
+; WIN32-NEXT: addl %esi, %ebp
+; WIN32-NEXT: xorl %ebx, %ebx
+; WIN32-NEXT: movl %ebp, %edi
+; WIN32-NEXT: subl %esi, %edi
+; WIN32-NEXT: setb %bl
+; WIN32-NEXT: xorl %eax, %ecx
+; WIN32-NEXT: addl %ebx, %ecx
+; WIN32-NEXT: xorl %edx, %eax
+; WIN32-NEXT: movl %ecx, %edx
+; WIN32-NEXT: subl %ebx, %edx
+; WIN32-NEXT: adcl $0, %eax
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: jmp LBB31_16
+; WIN32-NEXT: LBB31_14: # %overflow.no
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: imull %edi, %ebp
+; WIN32-NEXT: addl %edx, %ebp
+; WIN32-NEXT: imull %ebx, %ecx
+; WIN32-NEXT: addl %ebp, %ecx
+; WIN32-NEXT: movl %eax, %ebp
+; WIN32-NEXT: xorl %eax, %eax
+; WIN32-NEXT: LBB31_16: # %overflow.res
+; WIN32-NEXT: movl %ebp, (%esi)
+; WIN32-NEXT: movl %ecx, 4(%esi)
+; WIN32-NEXT: andb $1, %al
+; WIN32-NEXT: # kill: def $al killed $al killed $eax
; WIN32-NEXT: addl $12, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
@@ -2133,38 +3024,94 @@ define zeroext i1 @umuloi64_load(ptr %ptr1, i64 %v2, ptr %res) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: umuloi64_load:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
+; WIN32-NEXT: pushl %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; WIN32-NEXT: movl (%eax), %ebp
-; WIN32-NEXT: movl 4(%eax), %eax
-; WIN32-NEXT: testl %esi, %esi
-; WIN32-NEXT: setne %dl
-; WIN32-NEXT: testl %eax, %eax
+; WIN32-NEXT: movl (%eax), %edi
+; WIN32-NEXT: movl 4(%eax), %ebx
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: je LBB38_5
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: je LBB38_2
+; WIN32-NEXT: # %bb.7: # %overflow
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: testl %ebx, %ebx
; WIN32-NEXT: setne %cl
-; WIN32-NEXT: andb %dl, %cl
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: andb %al, %cl
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: seto %bl
-; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: mull %edi
; WIN32-NEXT: seto %ch
; WIN32-NEXT: orb %bl, %ch
; WIN32-NEXT: orb %cl, %ch
-; WIN32-NEXT: leal (%edi,%eax), %esi
-; WIN32-NEXT: movl %ebp, %eax
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
-; WIN32-NEXT: addl %esi, %edx
-; WIN32-NEXT: setb %cl
-; WIN32-NEXT: orb %ch, %cl
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: movl %eax, (%esi)
-; WIN32-NEXT: movl %edx, 4(%esi)
-; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: movl (%esp), %edx # 4-byte Reload
+; WIN32-NEXT: leal (%edx,%eax), %ebx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: addl %ebx, %eax
+; WIN32-NEXT: setb %dl
+; WIN32-NEXT: orb %ch, %dl
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: jmp LBB38_8
+; WIN32-NEXT: LBB38_5: # %overflow.no.lhs
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: je LBB38_6
+; WIN32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: imull %ebx, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %ebp, %ebx
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebp
+; WIN32-NEXT: addl %esi, %eax
+; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
+; WIN32-NEXT: adcl %ebx, %edx
+; WIN32-NEXT: jmp LBB38_3
+; WIN32-NEXT: LBB38_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %ebx, %ebp
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: LBB38_3: # %overflow.res
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: setne %dl
+; WIN32-NEXT: jmp LBB38_8
+; WIN32-NEXT: LBB38_6: # %overflow.no
+; WIN32-NEXT: imull %ebp, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %esi, %ebx
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: addl %edi, %ebx
+; WIN32-NEXT: xorl %edx, %edx
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: LBB38_8: # %overflow.res
+; WIN32-NEXT: movl %esi, (%ecx)
+; WIN32-NEXT: movl %eax, 4(%ecx)
+; WIN32-NEXT: andb $1, %dl
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
@@ -2210,38 +3157,94 @@ define zeroext i1 @umuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) {
; WIN64-NEXT: retq
;
; WIN32-LABEL: umuloi64_load2:
-; WIN32: # %bb.0:
+; WIN32: # %bb.0: # %overflow.entry
; WIN32-NEXT: pushl %ebp
; WIN32-NEXT: pushl %ebx
; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: pushl %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; WIN32-NEXT: movl (%ecx), %ebp
-; WIN32-NEXT: movl 4(%ecx), %esi
-; WIN32-NEXT: testl %eax, %eax
-; WIN32-NEXT: setne %dl
-; WIN32-NEXT: testl %esi, %esi
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl (%eax), %edi
+; WIN32-NEXT: movl 4(%eax), %ebp
+; WIN32-NEXT: testl %ebx, %ebx
+; WIN32-NEXT: je LBB39_5
+; WIN32-NEXT: # %bb.1: # %overflow.lhs
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: je LBB39_2
+; WIN32-NEXT: # %bb.7: # %overflow
+; WIN32-NEXT: setne %al
+; WIN32-NEXT: testl %ebx, %ebx
; WIN32-NEXT: setne %cl
-; WIN32-NEXT: andb %dl, %cl
-; WIN32-NEXT: mull %ebp
-; WIN32-NEXT: movl %eax, %edi
+; WIN32-NEXT: andb %al, %cl
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
; WIN32-NEXT: seto %bl
-; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: mull {{[0-9]+}}(%esp)
+; WIN32-NEXT: movl %ebp, %eax
+; WIN32-NEXT: mull %esi
; WIN32-NEXT: seto %ch
; WIN32-NEXT: orb %bl, %ch
; WIN32-NEXT: orb %cl, %ch
-; WIN32-NEXT: leal (%edi,%eax), %esi
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl (%esp), %edx # 4-byte Reload
+; WIN32-NEXT: leal (%edx,%eax), %ebx
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: addl %ebx, %eax
+; WIN32-NEXT: setb %dl
+; WIN32-NEXT: orb %ch, %dl
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: jmp LBB39_8
+; WIN32-NEXT: LBB39_5: # %overflow.no.lhs
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: mull %edi
+; WIN32-NEXT: testl %ebp, %ebp
+; WIN32-NEXT: je LBB39_6
+; WIN32-NEXT: # %bb.4: # %overflow.no.lhs.only
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: imull %ebx, %edi
+; WIN32-NEXT: addl %edx, %edi
+; WIN32-NEXT: imull %ebp, %ebx
+; WIN32-NEXT: movl %esi, %eax
+; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
; WIN32-NEXT: mull %ebp
-; WIN32-NEXT: addl %esi, %edx
-; WIN32-NEXT: setb %cl
-; WIN32-NEXT: orb %ch, %cl
-; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: movl %eax, (%esi)
-; WIN32-NEXT: movl %edx, 4(%esi)
-; WIN32-NEXT: movl %ecx, %eax
+; WIN32-NEXT: addl %edi, %eax
+; WIN32-NEXT: adcl %ebx, %edx
+; WIN32-NEXT: jmp LBB39_3
+; WIN32-NEXT: LBB39_2: # %overflow.no.rhs.only
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %esi
+; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; WIN32-NEXT: imull %ebp, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %ebx, %ebp
+; WIN32-NEXT: movl %edi, %eax
+; WIN32-NEXT: mull %ebx
+; WIN32-NEXT: addl %esi, %eax
+; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload
+; WIN32-NEXT: adcl %ebp, %edx
+; WIN32-NEXT: LBB39_3: # %overflow.res
+; WIN32-NEXT: testl %edx, %edx
+; WIN32-NEXT: setne %dl
+; WIN32-NEXT: jmp LBB39_8
+; WIN32-NEXT: LBB39_6: # %overflow.no
+; WIN32-NEXT: imull %ebp, %esi
+; WIN32-NEXT: addl %edx, %esi
+; WIN32-NEXT: imull %edi, %ebx
+; WIN32-NEXT: addl %esi, %ebx
+; WIN32-NEXT: movl %eax, %esi
+; WIN32-NEXT: xorl %edx, %edx
+; WIN32-NEXT: movl %ebx, %eax
+; WIN32-NEXT: LBB39_8: # %overflow.res
+; WIN32-NEXT: movl %esi, (%ecx)
+; WIN32-NEXT: movl %eax, 4(%ecx)
+; WIN32-NEXT: andb $1, %dl
+; WIN32-NEXT: movl %edx, %eax
+; WIN32-NEXT: addl $4, %esp
; WIN32-NEXT: popl %esi
; WIN32-NEXT: popl %edi
; WIN32-NEXT: popl %ebx
More information about the llvm-commits
mailing list