[Mlir-commits] [llvm] [mlir] [profcheck] Fix profile metadata propagation for Large FP Operations (PR #175864)
Jin Huang
llvmlistbot at llvm.org
Mon Jan 26 01:20:03 PST 2026
https://github.com/jinhuang1102 updated https://github.com/llvm/llvm-project/pull/175864
>From 76053abe00d928f2034caa15f150c84a8dc8505e Mon Sep 17 00:00:00 2001
From: lonely eagle <2020382038 at qq.com>
Date: Mon, 26 Jan 2026 16:51:44 +0800
Subject: [PATCH] [mlir][analysis] Cleanup collectUnderlyingAddressValues
(NFC) (#177905)
---
llvm/lib/CodeGen/ExpandIRInsts.cpp | 77 +-
llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll | 112 +-
llvm/test/CodeGen/AMDGPU/itofp.i128.ll | 694 +++---
llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll | 1608 +++++++-------
llvm/test/CodeGen/RISCV/fpclamptosat.ll | 1890 +++++++++--------
.../X86/expand-large-fp-convert-fptosi129.ll | 33 +-
.../X86/expand-large-fp-convert-fptoui129.ll | 33 +-
.../X86/expand-large-fp-convert-si129tofp.ll | 70 +-
.../X86/expand-large-fp-convert-ui129tofp.ll | 70 +-
.../X86/expand-large-fp-optnone.ll | 25 +-
.../AliasAnalysis/LocalAliasAnalysis.cpp | 18 +-
11 files changed, 2393 insertions(+), 2237 deletions(-)
diff --git a/llvm/lib/CodeGen/ExpandIRInsts.cpp b/llvm/lib/CodeGen/ExpandIRInsts.cpp
index dac4d0fa466d4..7ccbd6ea0b335 100644
--- a/llvm/lib/CodeGen/ExpandIRInsts.cpp
+++ b/llvm/lib/CodeGen/ExpandIRInsts.cpp
@@ -40,10 +40,15 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
@@ -56,6 +61,10 @@
using namespace llvm;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
static cl::opt<unsigned>
ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
cl::init(llvm::IntegerType::MAX_INT_BITS),
@@ -69,6 +78,7 @@ static cl::opt<unsigned>
"more than <N> bits are expanded."));
namespace {
+
bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
auto *C = dyn_cast<ConstantInt>(V);
if (!C)
@@ -571,36 +581,60 @@ static void expandFPToI(Instruction *FPToI) {
ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
ConstantInt::getSigned(IntTy, -1));
+ applyProfMetadataIfEnabled(Sign, [&](Instruction *Inst) {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+ });
Value *And =
Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
Value *And2 = Builder.CreateAnd(
And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
Value *Or = Builder.CreateOr(Abs, ImplicitBit);
+ // The comparison checks the floating-point value is in the range (-1, 1). We
+ // assume unknown (50/50) as the branch weight.
Value *Cmp =
Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
- Builder.CreateCondBr(Cmp, End, IfEnd);
+ Value *CondBrEntry = Builder.CreateCondBr(Cmp, End, IfEnd);
+ applyProfMetadataIfEnabled(CondBrEntry, [&](Instruction *Inst) {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+ });
// if.end:
Builder.SetInsertPoint(IfEnd);
Value *Add1 = Builder.CreateAdd(
And2, ConstantInt::getSigned(
IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
+ // The comparison is doing the overflow check so we assume the 'true' path is
+ // unlikely.
Value *Cmp3 = Builder.CreateICmpULT(
Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
- Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
+ Value *CondBrIfEnd = Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
+ applyProfMetadataIfEnabled(CondBrIfEnd, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+ });
// if.then5:
Builder.SetInsertPoint(IfThen5);
Value *PosInf = Builder.CreateXor(NegOne, NegInf);
Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
+ applyProfMetadataIfEnabled(Cond8, [&](Instruction *Inst) {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+ });
Builder.CreateBr(End);
// if.end9:
Builder.SetInsertPoint(IfEnd9);
+ // The shift direction depends on the magnitude of the floating-point number.
+ // Given the lack of domain-specific profiles, we treat the left-shift and
+ // right-shift paths as 50/50.
Value *Cmp10 = Builder.CreateICmpULT(
And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
- Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
+ Value *CondBrIfEnd9 = Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
+ applyProfMetadataIfEnabled(CondBrIfEnd9, [&](Instruction *Inst) {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+ });
// if.then12:
Builder.SetInsertPoint(IfThen12);
@@ -772,8 +806,15 @@ static void expandIToFP(Instruction *IToFP) {
// entry:
Builder.SetInsertPoint(Entry);
+ // We assume that the zero is an unlikely input case, so the branch to 'End'
+ // is the unlikely path.
Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
- Builder.CreateCondBr(Cmp, End, IfEnd);
+ Value *CondBrEntry = Builder.CreateCondBr(Cmp, End, IfEnd);
+ applyProfMetadataIfEnabled(CondBrEntry, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+ });
// if.end:
Builder.SetInsertPoint(IfEnd);
@@ -790,13 +831,30 @@ static void expandIToFP(Instruction *IToFP) {
FloatWidth == 128 ? Call : Cast);
Value *Cmp3 = Builder.CreateICmpSGT(
Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
- Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
+ // This branch handles the rare case where rounding the mantissa causes a
+ // carry-out at the most significant bit, necessitating an increment of the
+ // exponent. This is rare case, so the True path is mared as likely.
+ Value *CondBrIfEnd = Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
+ applyProfMetadataIfEnabled(CondBrIfEnd, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createLikelyBranchWeights());
+ });
// if.then4:
Builder.SetInsertPoint(IfThen4);
llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
+ // Add branch weights to the SwitchInst. The weights are provided for the
+ // default case first (SwDefault), followed by each explicit case in the
+ // order they were added (SwBB, then SwEpilog). Because the following cases
+ // are rare, the defalut case is given a likely weight.
+ if (!ProfcheckDisableMetadataFixes) {
+ SI->setMetadata(LLVMContext::MD_prof,
+ MDBuilder(SI->getContext())
+ .createBranchWeights({(1U << 20) - 1, 1, 1}));
+ }
// sw.bb:
Builder.SetInsertPoint(SwBB);
@@ -850,7 +908,14 @@ static void expandIToFP(Instruction *IToFP) {
ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
else
ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
- Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
+ // Rounding usually keeps the exponent within its current magnitude and
+ // overflow is rare. The False path is unlikely to be taken.
+ Value *CondBrSwEpilog = Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
+ applyProfMetadataIfEnabled(CondBrSwEpilog, [&](Instruction *Inst) {
+ Inst->setMetadata(
+ LLVMContext::MD_prof,
+ MDBuilder(Inst->getContext()).createLikelyBranchWeights());
+ });
// if.then20
Builder.SetInsertPoint(IfThen20);
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
index eee3352fa7452..4da7ab3f2f974 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
@@ -15,7 +15,7 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB0_14
+; GCN-NEXT: s_cbranch_execz .LBB0_12
; GCN-NEXT: ; %bb.1: ; %itofp-if-end
; GCN-NEXT: v_sub_co_u32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
@@ -42,29 +42,22 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 25, v2
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT: ; %bb.2: ; %itofp-if-else
-; GCN-NEXT: v_add_u32_e32 v2, 0xffffff98, v7
-; GCN-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GCN-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc
-; GCN-NEXT: ; implicit-def: $vgpr2
-; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5
-; GCN-NEXT: ; %bb.3: ; %Flow3
+; GCN-NEXT: s_cbranch_execnz .LBB0_13
+; GCN-NEXT: .LBB0_2: ; %Flow3
; GCN-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GCN-NEXT: v_sub_u32_e32 v6, 0x7f, v7
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT: s_cbranch_execz .LBB0_13
-; GCN-NEXT: ; %bb.4: ; %NodeBlock
+; GCN-NEXT: s_cbranch_execz .LBB0_11
+; GCN-NEXT: ; %bb.3: ; %NodeBlock
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 25, v2
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GCN-NEXT: s_cbranch_execz .LBB0_8
-; GCN-NEXT: ; %bb.5: ; %LeafBlock
+; GCN-NEXT: s_cbranch_execz .LBB0_7
+; GCN-NEXT: ; %bb.4: ; %LeafBlock
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 26, v2
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GCN-NEXT: s_cbranch_execz .LBB0_7
-; GCN-NEXT: ; %bb.6: ; %itofp-sw-default
+; GCN-NEXT: s_cbranch_execz .LBB0_6
+; GCN-NEXT: ; %bb.5: ; %itofp-sw-default
; GCN-NEXT: v_sub_u32_e32 v12, 0x66, v7
; GCN-NEXT: v_sub_u32_e32 v10, 64, v12
; GCN-NEXT: v_lshrrev_b64 v[8:9], v12, v[0:1]
@@ -102,13 +95,13 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_or_b32_e32 v8, v15, v0
; GCN-NEXT: v_mov_b32_e32 v0, v8
; GCN-NEXT: v_mov_b32_e32 v1, v9
-; GCN-NEXT: .LBB0_7: ; %Flow1
+; GCN-NEXT: .LBB0_6: ; %Flow1
; GCN-NEXT: s_or_b64 exec, exec, s[12:13]
-; GCN-NEXT: .LBB0_8: ; %Flow2
+; GCN-NEXT: .LBB0_7: ; %Flow2
; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GCN-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GCN-NEXT: ; %bb.8: ; %itofp-sw-bb
; GCN-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GCN-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; GCN-NEXT: ; %bb.9: ; %itofp-sw-epilog
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: v_lshrrev_b32_e32 v4, 2, v0
; GCN-NEXT: v_and_or_b32 v0, v4, 1, v0
@@ -118,12 +111,10 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GCN-NEXT: v_alignbit_b32 v8, v1, v0, 2
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: ; %bb.11: ; %itofp-if-then20
-; GCN-NEXT: v_alignbit_b32 v8, v1, v0, 3
-; GCN-NEXT: v_mov_b32_e32 v6, v2
-; GCN-NEXT: ; %bb.12: ; %Flow
+; GCN-NEXT: s_cbranch_execnz .LBB0_14
+; GCN-NEXT: .LBB0_10: ; %Flow
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT: .LBB0_13: ; %Flow4
+; GCN-NEXT: .LBB0_11: ; %Flow4
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: v_and_b32_e32 v0, 0x80000000, v3
; GCN-NEXT: v_lshl_add_u32 v1, v6, 23, 1.0
@@ -136,10 +127,23 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GCN-NEXT: .LBB0_14: ; %Flow5
+; GCN-NEXT: .LBB0_12: ; %Flow5
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v0, v4
; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-NEXT: .LBB0_13: ; %itofp-if-else
+; GCN-NEXT: v_add_u32_e32 v2, 0xffffff98, v7
+; GCN-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc
+; GCN-NEXT: ; implicit-def: $vgpr2
+; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GCN-NEXT: s_branch .LBB0_2
+; GCN-NEXT: .LBB0_14: ; %itofp-if-then20
+; GCN-NEXT: v_alignbit_b32 v8, v1, v0, 3
+; GCN-NEXT: v_mov_b32_e32 v6, v2
+; GCN-NEXT: s_branch .LBB0_10
%cvt = sitofp i128 %x to bfloat
ret bfloat %cvt
}
@@ -153,7 +157,7 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB1_14
+; GCN-NEXT: s_cbranch_execz .LBB1_12
; GCN-NEXT: ; %bb.1: ; %itofp-if-end
; GCN-NEXT: v_ffbh_u32_e32 v4, v2
; GCN-NEXT: v_add_u32_e32 v4, 32, v4
@@ -171,29 +175,22 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: ; implicit-def: $vgpr7
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT: ; %bb.2: ; %itofp-if-else
-; GCN-NEXT: v_add_u32_e32 v2, 0xffffff98, v6
-; GCN-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GCN-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
-; GCN-NEXT: ; implicit-def: $vgpr4
-; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT: ; implicit-def: $vgpr2_vgpr3
-; GCN-NEXT: ; %bb.3: ; %Flow3
+; GCN-NEXT: s_cbranch_execnz .LBB1_13
+; GCN-NEXT: .LBB1_2: ; %Flow3
; GCN-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GCN-NEXT: v_sub_u32_e32 v5, 0x7f, v6
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT: s_cbranch_execz .LBB1_13
-; GCN-NEXT: ; %bb.4: ; %NodeBlock
+; GCN-NEXT: s_cbranch_execz .LBB1_11
+; GCN-NEXT: ; %bb.3: ; %NodeBlock
; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 25, v4
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GCN-NEXT: s_cbranch_execz .LBB1_8
-; GCN-NEXT: ; %bb.5: ; %LeafBlock
+; GCN-NEXT: s_cbranch_execz .LBB1_7
+; GCN-NEXT: ; %bb.4: ; %LeafBlock
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 26, v4
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GCN-NEXT: s_cbranch_execz .LBB1_7
-; GCN-NEXT: ; %bb.6: ; %itofp-sw-default
+; GCN-NEXT: s_cbranch_execz .LBB1_6
+; GCN-NEXT: ; %bb.5: ; %itofp-sw-default
; GCN-NEXT: v_sub_u32_e32 v11, 0x66, v6
; GCN-NEXT: v_sub_u32_e32 v9, 64, v11
; GCN-NEXT: v_lshrrev_b64 v[7:8], v11, v[0:1]
@@ -231,13 +228,13 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_or_b32_e32 v7, v14, v0
; GCN-NEXT: v_mov_b32_e32 v0, v7
; GCN-NEXT: v_mov_b32_e32 v1, v8
-; GCN-NEXT: .LBB1_7: ; %Flow1
+; GCN-NEXT: .LBB1_6: ; %Flow1
; GCN-NEXT: s_or_b64 exec, exec, s[12:13]
-; GCN-NEXT: .LBB1_8: ; %Flow2
+; GCN-NEXT: .LBB1_7: ; %Flow2
; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GCN-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GCN-NEXT: ; %bb.8: ; %itofp-sw-bb
; GCN-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GCN-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; GCN-NEXT: ; %bb.9: ; %itofp-sw-epilog
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: v_lshrrev_b32_e32 v2, 2, v0
; GCN-NEXT: v_and_or_b32 v0, v2, 1, v0
@@ -247,12 +244,10 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; GCN-NEXT: v_alignbit_b32 v7, v1, v0, 2
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: ; %bb.11: ; %itofp-if-then20
-; GCN-NEXT: v_alignbit_b32 v7, v1, v0, 3
-; GCN-NEXT: v_mov_b32_e32 v5, v4
-; GCN-NEXT: ; %bb.12: ; %Flow
+; GCN-NEXT: s_cbranch_execnz .LBB1_14
+; GCN-NEXT: .LBB1_10: ; %Flow
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT: .LBB1_13: ; %Flow4
+; GCN-NEXT: .LBB1_11: ; %Flow4
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: v_and_b32_e32 v0, 0x7fffff, v7
; GCN-NEXT: v_lshl_or_b32 v0, v5, 23, v0
@@ -264,10 +259,23 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GCN-NEXT: .LBB1_14: ; %Flow5
+; GCN-NEXT: .LBB1_12: ; %Flow5
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v0, v4
; GCN-NEXT: s_setpc_b64 s[30:31]
+; GCN-NEXT: .LBB1_13: ; %itofp-if-else
+; GCN-NEXT: v_add_u32_e32 v2, 0xffffff98, v6
+; GCN-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
+; GCN-NEXT: ; implicit-def: $vgpr4
+; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GCN-NEXT: ; implicit-def: $vgpr2_vgpr3
+; GCN-NEXT: s_branch .LBB1_2
+; GCN-NEXT: .LBB1_14: ; %itofp-if-then20
+; GCN-NEXT: v_alignbit_b32 v7, v1, v0, 3
+; GCN-NEXT: v_mov_b32_e32 v5, v4
+; GCN-NEXT: s_branch .LBB1_10
%cvt = uitofp i128 %x to bfloat
ret bfloat %cvt
}
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 2f9182e6e7c6a..0798a906c38b3 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -11,7 +11,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB0_14
+; SDAG-NEXT: s_cbranch_execz .LBB0_12
; SDAG-NEXT: ; %bb.1: ; %itofp-if-end
; SDAG-NEXT: v_sub_co_u32_e32 v4, vcc, 0, v0
; SDAG-NEXT: v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
@@ -38,29 +38,22 @@ define float @sitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 25, v2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT: ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v7
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc
-; SDAG-NEXT: ; implicit-def: $vgpr2
-; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
-; SDAG-NEXT: ; %bb.3: ; %Flow3
+; SDAG-NEXT: s_cbranch_execnz .LBB0_13
+; SDAG-NEXT: .LBB0_2: ; %Flow3
; SDAG-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: v_sub_u32_e32 v6, 0x7f, v7
; SDAG-NEXT: s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT: s_cbranch_execz .LBB0_13
-; SDAG-NEXT: ; %bb.4: ; %NodeBlock
+; SDAG-NEXT: s_cbranch_execz .LBB0_11
+; SDAG-NEXT: ; %bb.3: ; %NodeBlock
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 25, v2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT: s_cbranch_execz .LBB0_8
-; SDAG-NEXT: ; %bb.5: ; %LeafBlock
+; SDAG-NEXT: s_cbranch_execz .LBB0_7
+; SDAG-NEXT: ; %bb.4: ; %LeafBlock
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 26, v2
; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB0_7
-; SDAG-NEXT: ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT: s_cbranch_execz .LBB0_6
+; SDAG-NEXT: ; %bb.5: ; %itofp-sw-default
; SDAG-NEXT: v_sub_u32_e32 v12, 0x66, v7
; SDAG-NEXT: v_sub_u32_e32 v10, 64, v12
; SDAG-NEXT: v_lshrrev_b64 v[8:9], v12, v[0:1]
@@ -98,13 +91,13 @@ define float @sitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: v_or_b32_e32 v8, v15, v0
; SDAG-NEXT: v_mov_b32_e32 v0, v8
; SDAG-NEXT: v_mov_b32_e32 v1, v9
-; SDAG-NEXT: .LBB0_7: ; %Flow1
+; SDAG-NEXT: .LBB0_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT: .LBB0_8: ; %Flow2
+; SDAG-NEXT: .LBB0_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT: ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT: ; %bb.8: ; %itofp-sw-bb
; SDAG-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT: ; %bb.9: ; %itofp-sw-epilog
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: v_lshrrev_b32_e32 v4, 2, v0
; SDAG-NEXT: v_and_or_b32 v0, v4, 1, v0
@@ -114,21 +107,32 @@ define float @sitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; SDAG-NEXT: v_alignbit_b32 v8, v1, v0, 2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT: v_alignbit_b32 v8, v1, v0, 3
-; SDAG-NEXT: v_mov_b32_e32 v6, v2
-; SDAG-NEXT: ; %bb.12: ; %Flow
+; SDAG-NEXT: s_cbranch_execnz .LBB0_14
+; SDAG-NEXT: .LBB0_10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT: .LBB0_13: ; %Flow4
+; SDAG-NEXT: .LBB0_11: ; %Flow4
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: v_and_b32_e32 v0, 0x80000000, v3
; SDAG-NEXT: v_lshl_add_u32 v1, v6, 23, 1.0
; SDAG-NEXT: v_and_b32_e32 v2, 0x7fffff, v8
; SDAG-NEXT: v_or3_b32 v4, v2, v0, v1
-; SDAG-NEXT: .LBB0_14: ; %Flow5
+; SDAG-NEXT: .LBB0_12: ; %Flow5
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, v4
; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB0_13: ; %itofp-if-else
+; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v7
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc
+; SDAG-NEXT: ; implicit-def: $vgpr2
+; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT: s_branch .LBB0_2
+; SDAG-NEXT: .LBB0_14: ; %itofp-if-then20
+; SDAG-NEXT: v_alignbit_b32 v8, v1, v0, 3
+; SDAG-NEXT: v_mov_b32_e32 v6, v2
+; SDAG-NEXT: s_branch .LBB0_10
;
; GISEL-LABEL: sitofp_i128_to_f32:
; GISEL: ; %bb.0: ; %itofp-entry
@@ -139,7 +143,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: v_mov_b32_e32 v4, s4
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB0_14
+; GISEL-NEXT: s_cbranch_execz .LBB0_12
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v6, v0
@@ -166,29 +170,22 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr4
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT: ; implicit-def: $vgpr7
-; GISEL-NEXT: ; implicit-def: $vgpr0
-; GISEL-NEXT: ; implicit-def: $vgpr2
-; GISEL-NEXT: ; %bb.3: ; %Flow3
+; GISEL-NEXT: s_cbranch_execnz .LBB0_13
+; GISEL-NEXT: .LBB0_2: ; %Flow3
; GISEL-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GISEL-NEXT: v_sub_u32_e32 v8, 0x7f, v5
; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT: s_cbranch_execz .LBB0_13
-; GISEL-NEXT: ; %bb.4: ; %NodeBlock
+; GISEL-NEXT: s_cbranch_execz .LBB0_11
+; GISEL-NEXT: ; %bb.3: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT: s_cbranch_execz .LBB0_8
-; GISEL-NEXT: ; %bb.5: ; %LeafBlock
+; GISEL-NEXT: s_cbranch_execz .LBB0_7
+; GISEL-NEXT: ; %bb.4: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB0_7
-; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT: s_cbranch_execz .LBB0_6
+; GISEL-NEXT: ; %bb.5: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
; GISEL-NEXT: v_sub_u32_e32 v11, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v4, v[0:1]
@@ -230,13 +227,13 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v1, v4
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
-; GISEL-NEXT: .LBB0_7: ; %Flow1
+; GISEL-NEXT: .LBB0_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: .LBB0_8: ; %Flow2
+; GISEL-NEXT: .LBB0_7: ; %Flow2
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT: ; %bb.8: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT: ; %bb.9: ; %itofp-sw-epilog
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
@@ -247,21 +244,32 @@ define float @sitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v8, v7
-; GISEL-NEXT: ; %bb.12: ; %Flow
+; GISEL-NEXT: s_cbranch_execnz .LBB0_14
+; GISEL-NEXT: .LBB0_10: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB0_13: ; %Flow4
+; GISEL-NEXT: .LBB0_11: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6
; GISEL-NEXT: v_lshl_add_u32 v1, v8, 23, 1.0
; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4
; GISEL-NEXT: v_or3_b32 v4, v2, v0, v1
-; GISEL-NEXT: .LBB0_14: ; %Flow5
+; GISEL-NEXT: .LBB0_12: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB0_13: ; %itofp-if-else
+; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr7
+; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: ; implicit-def: $vgpr2
+; GISEL-NEXT: s_branch .LBB0_2
+; GISEL-NEXT: .LBB0_14: ; %itofp-if-then20
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT: v_mov_b32_e32 v8, v7
+; GISEL-NEXT: s_branch .LBB0_10
%cvt = sitofp i128 %x to float
ret float %cvt
}
@@ -275,7 +283,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB1_14
+; SDAG-NEXT: s_cbranch_execz .LBB1_12
; SDAG-NEXT: ; %bb.1: ; %itofp-if-end
; SDAG-NEXT: v_ffbh_u32_e32 v4, v2
; SDAG-NEXT: v_add_u32_e32 v4, 32, v4
@@ -293,29 +301,22 @@ define float @uitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: ; implicit-def: $vgpr7
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT: ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v6
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
-; SDAG-NEXT: ; implicit-def: $vgpr4
-; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: ; %bb.3: ; %Flow3
+; SDAG-NEXT: s_cbranch_execnz .LBB1_13
+; SDAG-NEXT: .LBB1_2: ; %Flow3
; SDAG-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: v_sub_u32_e32 v5, 0x7f, v6
; SDAG-NEXT: s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT: s_cbranch_execz .LBB1_13
-; SDAG-NEXT: ; %bb.4: ; %NodeBlock
+; SDAG-NEXT: s_cbranch_execz .LBB1_11
+; SDAG-NEXT: ; %bb.3: ; %NodeBlock
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 25, v4
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT: s_cbranch_execz .LBB1_8
-; SDAG-NEXT: ; %bb.5: ; %LeafBlock
+; SDAG-NEXT: s_cbranch_execz .LBB1_7
+; SDAG-NEXT: ; %bb.4: ; %LeafBlock
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 26, v4
; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB1_7
-; SDAG-NEXT: ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT: s_cbranch_execz .LBB1_6
+; SDAG-NEXT: ; %bb.5: ; %itofp-sw-default
; SDAG-NEXT: v_sub_u32_e32 v11, 0x66, v6
; SDAG-NEXT: v_sub_u32_e32 v9, 64, v11
; SDAG-NEXT: v_lshrrev_b64 v[7:8], v11, v[0:1]
@@ -353,13 +354,13 @@ define float @uitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: v_or_b32_e32 v7, v14, v0
; SDAG-NEXT: v_mov_b32_e32 v0, v7
; SDAG-NEXT: v_mov_b32_e32 v1, v8
-; SDAG-NEXT: .LBB1_7: ; %Flow1
+; SDAG-NEXT: .LBB1_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT: .LBB1_8: ; %Flow2
+; SDAG-NEXT: .LBB1_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT: ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT: ; %bb.8: ; %itofp-sw-bb
; SDAG-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT: ; %bb.9: ; %itofp-sw-epilog
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: v_lshrrev_b32_e32 v2, 2, v0
; SDAG-NEXT: v_and_or_b32 v0, v2, 1, v0
@@ -369,20 +370,31 @@ define float @uitofp_i128_to_f32(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; SDAG-NEXT: v_alignbit_b32 v7, v1, v0, 2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT: v_alignbit_b32 v7, v1, v0, 3
-; SDAG-NEXT: v_mov_b32_e32 v5, v4
-; SDAG-NEXT: ; %bb.12: ; %Flow
+; SDAG-NEXT: s_cbranch_execnz .LBB1_14
+; SDAG-NEXT: .LBB1_10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT: .LBB1_13: ; %Flow4
+; SDAG-NEXT: .LBB1_11: ; %Flow4
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: v_and_b32_e32 v0, 0x7fffff, v7
; SDAG-NEXT: v_lshl_or_b32 v0, v5, 23, v0
; SDAG-NEXT: v_add_u32_e32 v4, 1.0, v0
-; SDAG-NEXT: .LBB1_14: ; %Flow5
+; SDAG-NEXT: .LBB1_12: ; %Flow5
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, v4
; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB1_13: ; %itofp-if-else
+; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v6
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
+; SDAG-NEXT: ; implicit-def: $vgpr4
+; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT: s_branch .LBB1_2
+; SDAG-NEXT: .LBB1_14: ; %itofp-if-then20
+; SDAG-NEXT: v_alignbit_b32 v7, v1, v0, 3
+; SDAG-NEXT: v_mov_b32_e32 v5, v4
+; SDAG-NEXT: s_branch .LBB1_10
;
; GISEL-LABEL: uitofp_i128_to_f32:
; GISEL: ; %bb.0: ; %itofp-entry
@@ -393,7 +405,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: s_mov_b32 s4, 0
; GISEL-NEXT: v_mov_b32_e32 v4, s4
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB1_14
+; GISEL-NEXT: s_cbranch_execz .LBB1_12
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ffbh_u32_e32 v5, v0
; GISEL-NEXT: v_ffbh_u32_e32 v4, v1
@@ -411,29 +423,22 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr4
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT: ; implicit-def: $vgpr6
-; GISEL-NEXT: ; implicit-def: $vgpr0
-; GISEL-NEXT: ; implicit-def: $vgpr2
-; GISEL-NEXT: ; %bb.3: ; %Flow3
+; GISEL-NEXT: s_cbranch_execnz .LBB1_13
+; GISEL-NEXT: .LBB1_2: ; %Flow3
; GISEL-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5
; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT: s_cbranch_execz .LBB1_13
-; GISEL-NEXT: ; %bb.4: ; %NodeBlock
+; GISEL-NEXT: s_cbranch_execz .LBB1_11
+; GISEL-NEXT: ; %bb.3: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v6
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT: s_cbranch_execz .LBB1_8
-; GISEL-NEXT: ; %bb.5: ; %LeafBlock
+; GISEL-NEXT: s_cbranch_execz .LBB1_7
+; GISEL-NEXT: ; %bb.4: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v6
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB1_7
-; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT: s_cbranch_execz .LBB1_6
+; GISEL-NEXT: ; %bb.5: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v4, v[0:1]
@@ -475,13 +480,13 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v1, v4
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
-; GISEL-NEXT: .LBB1_7: ; %Flow1
+; GISEL-NEXT: .LBB1_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: .LBB1_8: ; %Flow2
+; GISEL-NEXT: .LBB1_7: ; %Flow2
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT: ; %bb.8: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT: ; %bb.9: ; %itofp-sw-epilog
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
@@ -492,20 +497,31 @@ define float @uitofp_i128_to_f32(i128 %x) {
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v7, v6
-; GISEL-NEXT: ; %bb.12: ; %Flow
+; GISEL-NEXT: s_cbranch_execnz .LBB1_14
+; GISEL-NEXT: .LBB1_10: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB1_13: ; %Flow4
+; GISEL-NEXT: .LBB1_11: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_lshl_add_u32 v0, v7, 23, 1.0
; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff
; GISEL-NEXT: v_and_or_b32 v4, v4, v1, v0
-; GISEL-NEXT: .LBB1_14: ; %Flow5
+; GISEL-NEXT: .LBB1_12: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB1_13: ; %itofp-if-else
+; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr6
+; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: ; implicit-def: $vgpr2
+; GISEL-NEXT: s_branch .LBB1_2
+; GISEL-NEXT: .LBB1_14: ; %itofp-if-then20
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GISEL-NEXT: s_branch .LBB1_10
%cvt = uitofp i128 %x to float
ret float %cvt
}
@@ -522,7 +538,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v1, 0
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB2_14
+; SDAG-NEXT: s_cbranch_execz .LBB2_12
; SDAG-NEXT: ; %bb.1: ; %itofp-if-end
; SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, 0, v4
; SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, 0, v5, vcc
@@ -550,30 +566,22 @@ define double @sitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT: ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffffb5, v9
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; SDAG-NEXT: ; implicit-def: $vgpr2
-; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
-; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
-; SDAG-NEXT: ; %bb.3: ; %Flow3
+; SDAG-NEXT: s_cbranch_execnz .LBB2_13
+; SDAG-NEXT: .LBB2_2: ; %Flow3
; SDAG-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: v_sub_u32_e32 v8, 0x7f, v9
; SDAG-NEXT: s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT: s_cbranch_execz .LBB2_13
-; SDAG-NEXT: ; %bb.4: ; %NodeBlock
+; SDAG-NEXT: s_cbranch_execz .LBB2_11
+; SDAG-NEXT: ; %bb.3: ; %NodeBlock
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 54, v2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT: s_cbranch_execz .LBB2_8
-; SDAG-NEXT: ; %bb.5: ; %LeafBlock
+; SDAG-NEXT: s_cbranch_execz .LBB2_7
+; SDAG-NEXT: ; %bb.4: ; %LeafBlock
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 55, v2
; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB2_7
-; SDAG-NEXT: ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT: s_cbranch_execz .LBB2_6
+; SDAG-NEXT: ; %bb.5: ; %itofp-sw-default
; SDAG-NEXT: v_sub_u32_e32 v12, 0x49, v9
; SDAG-NEXT: v_sub_u32_e32 v10, 64, v12
; SDAG-NEXT: v_lshrrev_b64 v[0:1], v12, v[4:5]
@@ -616,16 +624,16 @@ define double @sitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_mov_b32_e32 v5, v1
; SDAG-NEXT: v_mov_b32_e32 v4, v0
; SDAG-NEXT: v_mov_b32_e32 v7, v11
-; SDAG-NEXT: .LBB2_7: ; %Flow1
+; SDAG-NEXT: .LBB2_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT: .LBB2_8: ; %Flow2
+; SDAG-NEXT: .LBB2_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT: ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT: ; %bb.8: ; %itofp-sw-bb
; SDAG-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
; SDAG-NEXT: v_lshrrev_b32_e32 v0, 31, v5
; SDAG-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
; SDAG-NEXT: v_or_b32_e32 v6, v6, v0
-; SDAG-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT: ; %bb.9: ; %itofp-sw-epilog
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: v_lshrrev_b32_e32 v0, 2, v4
; SDAG-NEXT: v_and_or_b32 v0, v0, 1, v4
@@ -638,23 +646,35 @@ define double @sitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_and_b32_e32 v1, 0x800000, v5
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT: v_lshrrev_b64 v[0:1], 3, v[4:5]
-; SDAG-NEXT: v_lshlrev_b32_e32 v4, 29, v6
-; SDAG-NEXT: v_or_b32_e32 v10, v1, v4
-; SDAG-NEXT: v_mov_b32_e32 v8, v2
-; SDAG-NEXT: ; %bb.12: ; %Flow
+; SDAG-NEXT: s_cbranch_execnz .LBB2_14
+; SDAG-NEXT: .LBB2_10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT: .LBB2_13: ; %Flow4
+; SDAG-NEXT: .LBB2_11: ; %Flow4
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: v_mov_b32_e32 v2, 0x3ff00000
; SDAG-NEXT: v_and_b32_e32 v1, 0x80000000, v3
; SDAG-NEXT: v_lshl_add_u32 v2, v8, 20, v2
; SDAG-NEXT: v_and_b32_e32 v3, 0xfffff, v10
; SDAG-NEXT: v_or3_b32 v1, v3, v1, v2
-; SDAG-NEXT: .LBB2_14: ; %Flow5
+; SDAG-NEXT: .LBB2_12: ; %Flow5
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB2_13: ; %itofp-if-else
+; SDAG-NEXT: v_add_u32_e32 v2, 0xffffffb5, v9
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[4:5]
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT: ; implicit-def: $vgpr2
+; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7
+; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT: s_branch .LBB2_2
+; SDAG-NEXT: .LBB2_14: ; %itofp-if-then20
+; SDAG-NEXT: v_lshrrev_b64 v[0:1], 3, v[4:5]
+; SDAG-NEXT: v_lshlrev_b32_e32 v4, 29, v6
+; SDAG-NEXT: v_or_b32_e32 v10, v1, v4
+; SDAG-NEXT: v_mov_b32_e32 v8, v2
+; SDAG-NEXT: s_branch .LBB2_10
;
; GISEL-LABEL: sitofp_i128_to_f64:
; GISEL: ; %bb.0: ; %itofp-entry
@@ -668,7 +688,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v0, s4
; GISEL-NEXT: v_mov_b32_e32 v1, s5
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB2_14
+; GISEL-NEXT: s_cbranch_execz .LBB2_12
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v6, v4
@@ -696,29 +716,22 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT: v_add_u32_e32 v4, 0xffffffb5, v9
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v4, v[2:3]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc
-; GISEL-NEXT: ; implicit-def: $vgpr7
-; GISEL-NEXT: ; implicit-def: $vgpr2
-; GISEL-NEXT: ; %bb.3: ; %Flow3
+; GISEL-NEXT: s_cbranch_execnz .LBB2_13
+; GISEL-NEXT: .LBB2_2: ; %Flow3
; GISEL-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GISEL-NEXT: v_sub_u32_e32 v8, 0x7f, v9
; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT: s_cbranch_execz .LBB2_13
-; GISEL-NEXT: ; %bb.4: ; %NodeBlock
+; GISEL-NEXT: s_cbranch_execz .LBB2_11
+; GISEL-NEXT: ; %bb.3: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 55, v7
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT: s_cbranch_execz .LBB2_8
-; GISEL-NEXT: ; %bb.5: ; %LeafBlock
+; GISEL-NEXT: s_cbranch_execz .LBB2_7
+; GISEL-NEXT: ; %bb.4: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 55, v7
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB2_7
-; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT: s_cbranch_execz .LBB2_6
+; GISEL-NEXT: ; %bb.5: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v14, 0x49, v9
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v14
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v14, v[2:3]
@@ -762,12 +775,12 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v3, v10
; GISEL-NEXT: v_mov_b32_e32 v4, v11
; GISEL-NEXT: v_mov_b32_e32 v5, v12
-; GISEL-NEXT: .LBB2_7: ; %Flow1
+; GISEL-NEXT: .LBB2_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: .LBB2_8: ; %Flow2
+; GISEL-NEXT: .LBB2_7: ; %Flow2
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT: s_cbranch_execz .LBB2_10
-; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT: s_cbranch_execz .LBB2_9
+; GISEL-NEXT: ; %bb.8: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[2:3]
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 31, v3
@@ -776,7 +789,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v4, v2
; GISEL-NEXT: v_mov_b32_e32 v3, v1
; GISEL-NEXT: v_mov_b32_e32 v2, v0
-; GISEL-NEXT: .LBB2_10: ; %itofp-sw-epilog
+; GISEL-NEXT: .LBB2_9: ; %itofp-sw-epilog
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_bfe_u32 v0, v2, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v2, v0
@@ -789,22 +802,33 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[9:10]
; GISEL-NEXT: v_lshl_or_b32 v10, v4, 30, v1
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT: v_lshrrev_b64 v[0:1], 3, v[2:3]
-; GISEL-NEXT: v_mov_b32_e32 v8, v7
-; GISEL-NEXT: v_lshl_or_b32 v10, v4, 29, v1
-; GISEL-NEXT: ; %bb.12: ; %Flow
+; GISEL-NEXT: s_cbranch_execnz .LBB2_14
+; GISEL-NEXT: .LBB2_10: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB2_13: ; %Flow4
+; GISEL-NEXT: .LBB2_11: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_mov_b32_e32 v2, 0x3ff00000
; GISEL-NEXT: v_and_b32_e32 v1, 0x80000000, v6
; GISEL-NEXT: v_lshl_add_u32 v2, v8, 20, v2
; GISEL-NEXT: v_and_b32_e32 v3, 0xfffff, v10
; GISEL-NEXT: v_or3_b32 v1, v3, v1, v2
-; GISEL-NEXT: .LBB2_14: ; %Flow5
+; GISEL-NEXT: .LBB2_12: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB2_13: ; %itofp-if-else
+; GISEL-NEXT: v_add_u32_e32 v4, 0xffffffb5, v9
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v4, v[2:3]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr7
+; GISEL-NEXT: ; implicit-def: $vgpr2
+; GISEL-NEXT: s_branch .LBB2_2
+; GISEL-NEXT: .LBB2_14: ; %itofp-if-then20
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], 3, v[2:3]
+; GISEL-NEXT: v_mov_b32_e32 v8, v7
+; GISEL-NEXT: v_lshl_or_b32 v10, v4, 29, v1
+; GISEL-NEXT: s_branch .LBB2_10
%cvt = sitofp i128 %x to double
ret double %cvt
}
@@ -819,7 +843,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_mov_b32_e32 v4, 0
; SDAG-NEXT: v_mov_b32_e32 v5, 0
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB3_14
+; SDAG-NEXT: s_cbranch_execz .LBB3_12
; SDAG-NEXT: ; %bb.1: ; %itofp-if-end
; SDAG-NEXT: v_ffbh_u32_e32 v4, v2
; SDAG-NEXT: v_add_u32_e32 v4, 32, v4
@@ -838,30 +862,22 @@ define double @uitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT: ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffffb5, v8
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v1, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
-; SDAG-NEXT: ; implicit-def: $vgpr6
-; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT: ; %bb.3: ; %Flow3
+; SDAG-NEXT: s_cbranch_execnz .LBB3_13
+; SDAG-NEXT: .LBB3_2: ; %Flow3
; SDAG-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: v_sub_u32_e32 v7, 0x7f, v8
; SDAG-NEXT: s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT: s_cbranch_execz .LBB3_13
-; SDAG-NEXT: ; %bb.4: ; %NodeBlock
+; SDAG-NEXT: s_cbranch_execz .LBB3_11
+; SDAG-NEXT: ; %bb.3: ; %NodeBlock
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 54, v6
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT: s_cbranch_execz .LBB3_8
-; SDAG-NEXT: ; %bb.5: ; %LeafBlock
+; SDAG-NEXT: s_cbranch_execz .LBB3_7
+; SDAG-NEXT: ; %bb.4: ; %LeafBlock
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 55, v6
; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB3_7
-; SDAG-NEXT: ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT: s_cbranch_execz .LBB3_6
+; SDAG-NEXT: ; %bb.5: ; %itofp-sw-default
; SDAG-NEXT: v_sub_u32_e32 v11, 0x49, v8
; SDAG-NEXT: v_sub_u32_e32 v9, 64, v11
; SDAG-NEXT: v_lshrrev_b64 v[4:5], v11, v[0:1]
@@ -904,16 +920,16 @@ define double @uitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_mov_b32_e32 v0, v4
; SDAG-NEXT: v_mov_b32_e32 v1, v5
; SDAG-NEXT: v_mov_b32_e32 v3, v10
-; SDAG-NEXT: .LBB3_7: ; %Flow1
+; SDAG-NEXT: .LBB3_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT: .LBB3_8: ; %Flow2
+; SDAG-NEXT: .LBB3_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT: ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT: ; %bb.8: ; %itofp-sw-bb
; SDAG-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; SDAG-NEXT: v_lshrrev_b32_e32 v3, 31, v1
; SDAG-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; SDAG-NEXT: v_or_b32_e32 v2, v2, v3
-; SDAG-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT: ; %bb.9: ; %itofp-sw-epilog
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: v_lshrrev_b32_e32 v3, 2, v0
; SDAG-NEXT: v_and_or_b32 v0, v3, 1, v0
@@ -925,22 +941,34 @@ define double @uitofp_i128_to_f64(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
; SDAG-NEXT: v_alignbit_b32 v9, v2, v1, 2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
-; SDAG-NEXT: v_alignbit_b32 v9, v2, v1, 3
-; SDAG-NEXT: v_mov_b32_e32 v7, v6
-; SDAG-NEXT: ; %bb.12: ; %Flow
+; SDAG-NEXT: s_cbranch_execnz .LBB3_14
+; SDAG-NEXT: .LBB3_10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT: .LBB3_13: ; %Flow4
+; SDAG-NEXT: .LBB3_11: ; %Flow4
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: v_and_b32_e32 v0, 0xfffff, v9
; SDAG-NEXT: v_lshl_or_b32 v0, v7, 20, v0
; SDAG-NEXT: v_add_u32_e32 v5, 0x3ff00000, v0
-; SDAG-NEXT: .LBB3_14: ; %Flow5
+; SDAG-NEXT: .LBB3_12: ; %Flow5
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, v4
; SDAG-NEXT: v_mov_b32_e32 v1, v5
; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB3_13: ; %itofp-if-else
+; SDAG-NEXT: v_add_u32_e32 v2, 0xffffffb5, v8
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v1, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
+; SDAG-NEXT: ; implicit-def: $vgpr6
+; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT: s_branch .LBB3_2
+; SDAG-NEXT: .LBB3_14: ; %itofp-if-then20
+; SDAG-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
+; SDAG-NEXT: v_alignbit_b32 v9, v2, v1, 3
+; SDAG-NEXT: v_mov_b32_e32 v7, v6
+; SDAG-NEXT: s_branch .LBB3_10
;
; GISEL-LABEL: uitofp_i128_to_f64:
; GISEL: ; %bb.0: ; %itofp-entry
@@ -952,7 +980,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v4, s4
; GISEL-NEXT: v_mov_b32_e32 v5, s5
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB3_14
+; GISEL-NEXT: s_cbranch_execz .LBB3_12
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ffbh_u32_e32 v5, v0
; GISEL-NEXT: v_ffbh_u32_e32 v4, v1
@@ -971,29 +999,22 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffb5, v8
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v1, vcc
-; GISEL-NEXT: ; implicit-def: $vgpr6
-; GISEL-NEXT: ; implicit-def: $vgpr0
-; GISEL-NEXT: ; %bb.3: ; %Flow3
+; GISEL-NEXT: s_cbranch_execnz .LBB3_13
+; GISEL-NEXT: .LBB3_2: ; %Flow3
; GISEL-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v8
; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT: s_cbranch_execz .LBB3_13
-; GISEL-NEXT: ; %bb.4: ; %NodeBlock
+; GISEL-NEXT: s_cbranch_execz .LBB3_11
+; GISEL-NEXT: ; %bb.3: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 55, v6
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT: s_cbranch_execz .LBB3_8
-; GISEL-NEXT: ; %bb.5: ; %LeafBlock
+; GISEL-NEXT: s_cbranch_execz .LBB3_7
+; GISEL-NEXT: ; %bb.4: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 55, v6
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB3_7
-; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT: s_cbranch_execz .LBB3_6
+; GISEL-NEXT: ; %bb.5: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v13, 0x49, v8
; GISEL-NEXT: v_sub_u32_e32 v9, 64, v13
; GISEL-NEXT: v_lshrrev_b64 v[4:5], v13, v[0:1]
@@ -1038,12 +1059,12 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v1, v9
; GISEL-NEXT: v_mov_b32_e32 v2, v10
; GISEL-NEXT: v_mov_b32_e32 v3, v11
-; GISEL-NEXT: .LBB3_7: ; %Flow1
+; GISEL-NEXT: .LBB3_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: .LBB3_8: ; %Flow2
+; GISEL-NEXT: .LBB3_7: ; %Flow2
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT: s_cbranch_execz .LBB3_10
-; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT: s_cbranch_execz .LBB3_9
+; GISEL-NEXT: ; %bb.8: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1]
; GISEL-NEXT: v_lshlrev_b64 v[10:11], 1, v[2:3]
; GISEL-NEXT: v_lshrrev_b32_e32 v0, 31, v1
@@ -1052,7 +1073,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v1, v9
; GISEL-NEXT: v_mov_b32_e32 v2, v10
; GISEL-NEXT: v_mov_b32_e32 v3, v11
-; GISEL-NEXT: .LBB3_10: ; %itofp-sw-epilog
+; GISEL-NEXT: .LBB3_9: ; %itofp-sw-epilog
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_bfe_u32 v4, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v4
@@ -1068,25 +1089,36 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 2, v1
; GISEL-NEXT: v_or_b32_e32 v9, v8, v5
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT: v_lshlrev_b64 v[2:3], 29, v[2:3]
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT: v_lshrrev_b32_e32 v0, 3, v1
-; GISEL-NEXT: v_or_b32_e32 v9, v2, v0
-; GISEL-NEXT: v_mov_b32_e32 v7, v6
-; GISEL-NEXT: ; %bb.12: ; %Flow
+; GISEL-NEXT: s_cbranch_execnz .LBB3_14
+; GISEL-NEXT: .LBB3_10: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB3_13: ; %Flow4
+; GISEL-NEXT: .LBB3_11: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff00000
; GISEL-NEXT: v_lshl_add_u32 v0, v7, 20, v0
; GISEL-NEXT: v_mov_b32_e32 v1, 0xfffff
; GISEL-NEXT: v_and_or_b32 v5, v9, v1, v0
-; GISEL-NEXT: .LBB3_14: ; %Flow5
+; GISEL-NEXT: .LBB3_12: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: v_mov_b32_e32 v1, v5
; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB3_13: ; %itofp-if-else
+; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffb5, v8
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v1, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr6
+; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: s_branch .LBB3_2
+; GISEL-NEXT: .LBB3_14: ; %itofp-if-then20
+; GISEL-NEXT: v_lshlrev_b64 v[2:3], 29, v[2:3]
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT: v_lshrrev_b32_e32 v0, 3, v1
+; GISEL-NEXT: v_or_b32_e32 v9, v2, v0
+; GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GISEL-NEXT: s_branch .LBB3_10
%cvt = uitofp i128 %x to double
ret double %cvt
}
@@ -1100,7 +1132,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB4_14
+; SDAG-NEXT: s_cbranch_execz .LBB4_12
; SDAG-NEXT: ; %bb.1: ; %itofp-if-end
; SDAG-NEXT: v_sub_co_u32_e32 v4, vcc, 0, v0
; SDAG-NEXT: v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
@@ -1127,29 +1159,22 @@ define half @sitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: v_cmp_gt_i32_e32 vcc, 25, v2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT: ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v7
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc
-; SDAG-NEXT: ; implicit-def: $vgpr2
-; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
-; SDAG-NEXT: ; %bb.3: ; %Flow3
+; SDAG-NEXT: s_cbranch_execnz .LBB4_13
+; SDAG-NEXT: .LBB4_2: ; %Flow3
; SDAG-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: v_sub_u32_e32 v6, 0x7f, v7
; SDAG-NEXT: s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT: s_cbranch_execz .LBB4_13
-; SDAG-NEXT: ; %bb.4: ; %NodeBlock
+; SDAG-NEXT: s_cbranch_execz .LBB4_11
+; SDAG-NEXT: ; %bb.3: ; %NodeBlock
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 25, v2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT: s_cbranch_execz .LBB4_8
-; SDAG-NEXT: ; %bb.5: ; %LeafBlock
+; SDAG-NEXT: s_cbranch_execz .LBB4_7
+; SDAG-NEXT: ; %bb.4: ; %LeafBlock
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 26, v2
; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB4_7
-; SDAG-NEXT: ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT: s_cbranch_execz .LBB4_6
+; SDAG-NEXT: ; %bb.5: ; %itofp-sw-default
; SDAG-NEXT: v_sub_u32_e32 v12, 0x66, v7
; SDAG-NEXT: v_sub_u32_e32 v10, 64, v12
; SDAG-NEXT: v_lshrrev_b64 v[8:9], v12, v[0:1]
@@ -1187,13 +1212,13 @@ define half @sitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: v_or_b32_e32 v8, v15, v0
; SDAG-NEXT: v_mov_b32_e32 v0, v8
; SDAG-NEXT: v_mov_b32_e32 v1, v9
-; SDAG-NEXT: .LBB4_7: ; %Flow1
+; SDAG-NEXT: .LBB4_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT: .LBB4_8: ; %Flow2
+; SDAG-NEXT: .LBB4_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT: ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT: ; %bb.8: ; %itofp-sw-bb
; SDAG-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT: ; %bb.9: ; %itofp-sw-epilog
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: v_lshrrev_b32_e32 v4, 2, v0
; SDAG-NEXT: v_and_or_b32 v0, v4, 1, v0
@@ -1203,22 +1228,33 @@ define half @sitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; SDAG-NEXT: v_alignbit_b32 v8, v1, v0, 2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT: v_alignbit_b32 v8, v1, v0, 3
-; SDAG-NEXT: v_mov_b32_e32 v6, v2
-; SDAG-NEXT: ; %bb.12: ; %Flow
+; SDAG-NEXT: s_cbranch_execnz .LBB4_14
+; SDAG-NEXT: .LBB4_10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT: .LBB4_13: ; %Flow4
+; SDAG-NEXT: .LBB4_11: ; %Flow4
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: v_and_b32_e32 v0, 0x80000000, v3
; SDAG-NEXT: v_lshl_add_u32 v1, v6, 23, 1.0
; SDAG-NEXT: v_and_b32_e32 v2, 0x7fffff, v8
; SDAG-NEXT: v_or3_b32 v0, v2, v0, v1
; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v0
-; SDAG-NEXT: .LBB4_14: ; %Flow5
+; SDAG-NEXT: .LBB4_12: ; %Flow5
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, v4
; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB4_13: ; %itofp-if-else
+; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v7
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc
+; SDAG-NEXT: ; implicit-def: $vgpr2
+; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT: ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT: s_branch .LBB4_2
+; SDAG-NEXT: .LBB4_14: ; %itofp-if-then20
+; SDAG-NEXT: v_alignbit_b32 v8, v1, v0, 3
+; SDAG-NEXT: v_mov_b32_e32 v6, v2
+; SDAG-NEXT: s_branch .LBB4_10
;
; GISEL-LABEL: sitofp_i128_to_f16:
; GISEL: ; %bb.0: ; %itofp-entry
@@ -1229,7 +1265,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v4, s4
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB4_14
+; GISEL-NEXT: s_cbranch_execz .LBB4_12
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v6, v0
@@ -1256,29 +1292,22 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr4
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT: ; implicit-def: $vgpr7
-; GISEL-NEXT: ; implicit-def: $vgpr0
-; GISEL-NEXT: ; implicit-def: $vgpr2
-; GISEL-NEXT: ; %bb.3: ; %Flow3
+; GISEL-NEXT: s_cbranch_execnz .LBB4_13
+; GISEL-NEXT: .LBB4_2: ; %Flow3
; GISEL-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GISEL-NEXT: v_sub_u32_e32 v8, 0x7f, v5
; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT: s_cbranch_execz .LBB4_13
-; GISEL-NEXT: ; %bb.4: ; %NodeBlock
+; GISEL-NEXT: s_cbranch_execz .LBB4_11
+; GISEL-NEXT: ; %bb.3: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v7
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT: s_cbranch_execz .LBB4_8
-; GISEL-NEXT: ; %bb.5: ; %LeafBlock
+; GISEL-NEXT: s_cbranch_execz .LBB4_7
+; GISEL-NEXT: ; %bb.4: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v7
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB4_7
-; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT: s_cbranch_execz .LBB4_6
+; GISEL-NEXT: ; %bb.5: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
; GISEL-NEXT: v_sub_u32_e32 v11, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v4, v[0:1]
@@ -1320,13 +1349,13 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v1, v4
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
-; GISEL-NEXT: .LBB4_7: ; %Flow1
+; GISEL-NEXT: .LBB4_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: .LBB4_8: ; %Flow2
+; GISEL-NEXT: .LBB4_7: ; %Flow2
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT: ; %bb.8: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT: ; %bb.9: ; %itofp-sw-epilog
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
@@ -1337,22 +1366,33 @@ define half @sitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v8, v7
-; GISEL-NEXT: ; %bb.12: ; %Flow
+; GISEL-NEXT: s_cbranch_execnz .LBB4_14
+; GISEL-NEXT: .LBB4_10: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB4_13: ; %Flow4
+; GISEL-NEXT: .LBB4_11: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6
; GISEL-NEXT: v_lshl_add_u32 v1, v8, 23, 1.0
; GISEL-NEXT: v_and_b32_e32 v2, 0x7fffff, v4
; GISEL-NEXT: v_or3_b32 v0, v2, v0, v1
; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0
-; GISEL-NEXT: .LBB4_14: ; %Flow5
+; GISEL-NEXT: .LBB4_12: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB4_13: ; %itofp-if-else
+; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr7
+; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: ; implicit-def: $vgpr2
+; GISEL-NEXT: s_branch .LBB4_2
+; GISEL-NEXT: .LBB4_14: ; %itofp-if-then20
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT: v_mov_b32_e32 v8, v7
+; GISEL-NEXT: s_branch .LBB4_10
%cvt = sitofp i128 %x to half
ret half %cvt
}
@@ -1366,7 +1406,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; SDAG-NEXT: v_mov_b32_e32 v4, 0
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB5_14
+; SDAG-NEXT: s_cbranch_execz .LBB5_12
; SDAG-NEXT: ; %bb.1: ; %itofp-if-end
; SDAG-NEXT: v_ffbh_u32_e32 v4, v2
; SDAG-NEXT: v_add_u32_e32 v4, 32, v4
@@ -1384,29 +1424,22 @@ define half @uitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: ; implicit-def: $vgpr7
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT: ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v6
-; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
-; SDAG-NEXT: ; implicit-def: $vgpr4
-; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT: ; %bb.3: ; %Flow3
+; SDAG-NEXT: s_cbranch_execnz .LBB5_13
+; SDAG-NEXT: .LBB5_2: ; %Flow3
; SDAG-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; SDAG-NEXT: v_sub_u32_e32 v5, 0x7f, v6
; SDAG-NEXT: s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT: s_cbranch_execz .LBB5_13
-; SDAG-NEXT: ; %bb.4: ; %NodeBlock
+; SDAG-NEXT: s_cbranch_execz .LBB5_11
+; SDAG-NEXT: ; %bb.3: ; %NodeBlock
; SDAG-NEXT: v_cmp_lt_i32_e32 vcc, 25, v4
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT: s_cbranch_execz .LBB5_8
-; SDAG-NEXT: ; %bb.5: ; %LeafBlock
+; SDAG-NEXT: s_cbranch_execz .LBB5_7
+; SDAG-NEXT: ; %bb.4: ; %LeafBlock
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 26, v4
; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT: s_cbranch_execz .LBB5_7
-; SDAG-NEXT: ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT: s_cbranch_execz .LBB5_6
+; SDAG-NEXT: ; %bb.5: ; %itofp-sw-default
; SDAG-NEXT: v_sub_u32_e32 v11, 0x66, v6
; SDAG-NEXT: v_sub_u32_e32 v9, 64, v11
; SDAG-NEXT: v_lshrrev_b64 v[7:8], v11, v[0:1]
@@ -1444,13 +1477,13 @@ define half @uitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: v_or_b32_e32 v7, v14, v0
; SDAG-NEXT: v_mov_b32_e32 v0, v7
; SDAG-NEXT: v_mov_b32_e32 v1, v8
-; SDAG-NEXT: .LBB5_7: ; %Flow1
+; SDAG-NEXT: .LBB5_6: ; %Flow1
; SDAG-NEXT: s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT: .LBB5_8: ; %Flow2
+; SDAG-NEXT: .LBB5_7: ; %Flow2
; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT: ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT: ; %bb.8: ; %itofp-sw-bb
; SDAG-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT: ; %bb.9: ; %itofp-sw-epilog
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; SDAG-NEXT: v_lshrrev_b32_e32 v2, 2, v0
; SDAG-NEXT: v_and_or_b32 v0, v2, 1, v0
@@ -1460,21 +1493,32 @@ define half @uitofp_i128_to_f16(i128 %x) {
; SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; SDAG-NEXT: v_alignbit_b32 v7, v1, v0, 2
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT: ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT: v_alignbit_b32 v7, v1, v0, 3
-; SDAG-NEXT: v_mov_b32_e32 v5, v4
-; SDAG-NEXT: ; %bb.12: ; %Flow
+; SDAG-NEXT: s_cbranch_execnz .LBB5_14
+; SDAG-NEXT: .LBB5_10: ; %Flow
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT: .LBB5_13: ; %Flow4
+; SDAG-NEXT: .LBB5_11: ; %Flow4
; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
; SDAG-NEXT: v_and_b32_e32 v0, 0x7fffff, v7
; SDAG-NEXT: v_lshl_or_b32 v0, v5, 23, v0
; SDAG-NEXT: v_add_u32_e32 v0, 1.0, v0
; SDAG-NEXT: v_cvt_f16_f32_e32 v4, v0
-; SDAG-NEXT: .LBB5_14: ; %Flow5
+; SDAG-NEXT: .LBB5_12: ; %Flow5
; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
; SDAG-NEXT: v_mov_b32_e32 v0, v4
; SDAG-NEXT: s_setpc_b64 s[30:31]
+; SDAG-NEXT: .LBB5_13: ; %itofp-if-else
+; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff98, v6
+; SDAG-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc
+; SDAG-NEXT: ; implicit-def: $vgpr4
+; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT: s_branch .LBB5_2
+; SDAG-NEXT: .LBB5_14: ; %itofp-if-then20
+; SDAG-NEXT: v_alignbit_b32 v7, v1, v0, 3
+; SDAG-NEXT: v_mov_b32_e32 v5, v4
+; SDAG-NEXT: s_branch .LBB5_10
;
; GISEL-LABEL: uitofp_i128_to_f16:
; GISEL: ; %bb.0: ; %itofp-entry
@@ -1485,7 +1529,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GISEL-NEXT: v_mov_b32_e32 v4, s4
; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB5_14
+; GISEL-NEXT: s_cbranch_execz .LBB5_12
; GISEL-NEXT: ; %bb.1: ; %itofp-if-end
; GISEL-NEXT: v_ffbh_u32_e32 v5, v0
; GISEL-NEXT: v_ffbh_u32_e32 v4, v1
@@ -1503,29 +1547,22 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: ; implicit-def: $vgpr4
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT: ; implicit-def: $vgpr6
-; GISEL-NEXT: ; implicit-def: $vgpr0
-; GISEL-NEXT: ; implicit-def: $vgpr2
-; GISEL-NEXT: ; %bb.3: ; %Flow3
+; GISEL-NEXT: s_cbranch_execnz .LBB5_13
+; GISEL-NEXT: .LBB5_2: ; %Flow3
; GISEL-NEXT: s_or_saveexec_b64 s[8:9], s[4:5]
; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v5
; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT: s_cbranch_execz .LBB5_13
-; GISEL-NEXT: ; %bb.4: ; %NodeBlock
+; GISEL-NEXT: s_cbranch_execz .LBB5_11
+; GISEL-NEXT: ; %bb.3: ; %NodeBlock
; GISEL-NEXT: v_cmp_le_i32_e32 vcc, 26, v6
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT: s_cbranch_execz .LBB5_8
-; GISEL-NEXT: ; %bb.5: ; %LeafBlock
+; GISEL-NEXT: s_cbranch_execz .LBB5_7
+; GISEL-NEXT: ; %bb.4: ; %LeafBlock
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 26, v6
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT: s_cbranch_execz .LBB5_7
-; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT: s_cbranch_execz .LBB5_6
+; GISEL-NEXT: ; %bb.5: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v4, 0x66, v5
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v4
; GISEL-NEXT: v_lshrrev_b64 v[8:9], v4, v[0:1]
@@ -1567,13 +1604,13 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v1, v4
; GISEL-NEXT: v_mov_b32_e32 v2, v5
; GISEL-NEXT: v_mov_b32_e32 v3, v6
-; GISEL-NEXT: .LBB5_7: ; %Flow1
+; GISEL-NEXT: .LBB5_6: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT: .LBB5_8: ; %Flow2
+; GISEL-NEXT: .LBB5_7: ; %Flow2
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT: ; %bb.8: ; %itofp-sw-bb
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT: ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT: ; %bb.9: ; %itofp-sw-epilog
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: v_bfe_u32 v2, v0, 2, 1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v2
@@ -1584,21 +1621,32 @@ define half @uitofp_i128_to_f16(i128 %x) {
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT: v_mov_b32_e32 v7, v6
-; GISEL-NEXT: ; %bb.12: ; %Flow
+; GISEL-NEXT: s_cbranch_execnz .LBB5_14
+; GISEL-NEXT: .LBB5_10: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: .LBB5_13: ; %Flow4
+; GISEL-NEXT: .LBB5_11: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
; GISEL-NEXT: v_lshl_add_u32 v0, v7, 23, 1.0
; GISEL-NEXT: v_mov_b32_e32 v1, 0x7fffff
; GISEL-NEXT: v_and_or_b32 v0, v4, v1, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v0
-; GISEL-NEXT: .LBB5_14: ; %Flow5
+; GISEL-NEXT: .LBB5_12: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB5_13: ; %itofp-if-else
+; GISEL-NEXT: v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT: ; implicit-def: $vgpr6
+; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: ; implicit-def: $vgpr2
+; GISEL-NEXT: s_branch .LBB5_2
+; GISEL-NEXT: .LBB5_14: ; %itofp-if-then20
+; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT: v_mov_b32_e32 v7, v6
+; GISEL-NEXT: s_branch .LBB5_10
%cvt = uitofp i128 %x to half
ret half %cvt
}
diff --git a/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll b/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll
index 00a1ed506d1ed..be0ea161fb762 100644
--- a/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll
+++ b/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll
@@ -120,7 +120,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV64-NEXT: or a0, a2, a4
; RV64-NEXT: or a6, a3, a5
; RV64-NEXT: or a0, a6, a0
-; RV64-NEXT: beqz a0, .LBB1_20
+; RV64-NEXT: beqz a0, .LBB1_17
; RV64-NEXT: # %bb.1: # %itofp-if-end
; RV64-NEXT: slli a0, a4, 56
; RV64-NEXT: srai a0, a0, 63
@@ -311,34 +311,14 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV64-NEXT: sub a7, a6, t0
; RV64-NEXT: li t2, 25
; RV64-NEXT: sub a6, t1, t0
-; RV64-NEXT: blt a7, t2, .LBB1_14
+; RV64-NEXT: blt a7, t2, .LBB1_18
; RV64-NEXT: # %bb.11: # %itofp-if-then4
; RV64-NEXT: li t1, 26
-; RV64-NEXT: beq a7, t1, .LBB1_16
+; RV64-NEXT: beq a7, t1, .LBB1_14
; RV64-NEXT: # %bb.12: # %itofp-if-then4
; RV64-NEXT: li t1, 25
-; RV64-NEXT: bne a7, t1, .LBB1_15
-; RV64-NEXT: # %bb.13: # %itofp-sw-bb
-; RV64-NEXT: srli a4, a2, 63
-; RV64-NEXT: slli a3, a3, 1
-; RV64-NEXT: or a3, a3, a4
-; RV64-NEXT: slli a2, a2, 1
-; RV64-NEXT: j .LBB1_16
-; RV64-NEXT: .LBB1_14: # %itofp-if-else
-; RV64-NEXT: addi a3, t0, -176
-; RV64-NEXT: sd a2, 160(sp)
-; RV64-NEXT: sd zero, 128(sp)
-; RV64-NEXT: sd zero, 136(sp)
-; RV64-NEXT: sd zero, 144(sp)
-; RV64-NEXT: sd zero, 152(sp)
-; RV64-NEXT: srli a2, a3, 3
-; RV64-NEXT: andi a2, a2, 24
-; RV64-NEXT: addi a4, sp, 160
-; RV64-NEXT: sub a4, a4, a2
-; RV64-NEXT: ld a2, 0(a4)
-; RV64-NEXT: sll a2, a2, a3
-; RV64-NEXT: j .LBB1_19
-; RV64-NEXT: .LBB1_15: # %itofp-sw-default
+; RV64-NEXT: beq a7, t1, .LBB1_20
+; RV64-NEXT: # %bb.13: # %itofp-sw-default
; RV64-NEXT: li t2, 174
; RV64-NEXT: sd zero, 96(sp)
; RV64-NEXT: sd zero, 104(sp)
@@ -405,7 +385,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV64-NEXT: snez a2, a2
; RV64-NEXT: or a2, t2, a2
; RV64-NEXT: mv a3, t0
-; RV64-NEXT: .LBB1_16: # %itofp-sw-epilog
+; RV64-NEXT: .LBB1_14: # %itofp-sw-epilog
; RV64-NEXT: slli a4, a2, 61
; RV64-NEXT: srli a4, a4, 63
; RV64-NEXT: or a2, a2, a4
@@ -413,18 +393,12 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV64-NEXT: seqz a4, a2
; RV64-NEXT: slli a5, a2, 37
; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: bltz a5, .LBB1_18
-; RV64-NEXT: # %bb.17:
+; RV64-NEXT: bltz a5, .LBB1_19
+; RV64-NEXT: # %bb.15:
; RV64-NEXT: srli a2, a2, 2
; RV64-NEXT: slli a3, a3, 62
; RV64-NEXT: or a2, a2, a3
-; RV64-NEXT: j .LBB1_19
-; RV64-NEXT: .LBB1_18: # %itofp-if-then20
-; RV64-NEXT: srli a2, a2, 3
-; RV64-NEXT: slli a3, a3, 61
-; RV64-NEXT: or a2, a2, a3
-; RV64-NEXT: mv a6, a7
-; RV64-NEXT: .LBB1_19: # %itofp-if-end26
+; RV64-NEXT: .LBB1_16: # %itofp-if-end26
; RV64-NEXT: lui a3, 524288
; RV64-NEXT: slli a6, a6, 23
; RV64-NEXT: and a0, a0, a3
@@ -436,9 +410,35 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV64-NEXT: or a0, a2, a0
; RV64-NEXT: ld s0, 200(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 208
-; RV64-NEXT: .LBB1_20: # %itofp-return
+; RV64-NEXT: .LBB1_17: # %itofp-return
; RV64-NEXT: sw a0, 0(a1)
; RV64-NEXT: ret
+; RV64-NEXT: .LBB1_18: # %itofp-if-else
+; RV64-NEXT: addi a3, t0, -176
+; RV64-NEXT: sd a2, 160(sp)
+; RV64-NEXT: sd zero, 128(sp)
+; RV64-NEXT: sd zero, 136(sp)
+; RV64-NEXT: sd zero, 144(sp)
+; RV64-NEXT: sd zero, 152(sp)
+; RV64-NEXT: srli a2, a3, 3
+; RV64-NEXT: andi a2, a2, 24
+; RV64-NEXT: addi a4, sp, 160
+; RV64-NEXT: sub a4, a4, a2
+; RV64-NEXT: ld a2, 0(a4)
+; RV64-NEXT: sll a2, a2, a3
+; RV64-NEXT: j .LBB1_16
+; RV64-NEXT: .LBB1_19: # %itofp-if-then20
+; RV64-NEXT: srli a2, a2, 3
+; RV64-NEXT: slli a3, a3, 61
+; RV64-NEXT: or a2, a2, a3
+; RV64-NEXT: mv a6, a7
+; RV64-NEXT: j .LBB1_16
+; RV64-NEXT: .LBB1_20: # %itofp-sw-bb
+; RV64-NEXT: srli a4, a2, 63
+; RV64-NEXT: slli a3, a3, 1
+; RV64-NEXT: or a3, a3, a4
+; RV64-NEXT: slli a2, a2, 1
+; RV64-NEXT: j .LBB1_14
;
; RV32-LABEL: test_bitint_200_to_float:
; RV32: # %bb.0: # %itofp-entry
@@ -455,7 +455,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV32-NEXT: or a4, t5, a3
; RV32-NEXT: or a4, a4, t4
; RV32-NEXT: or a0, a0, a4
-; RV32-NEXT: beqz a0, .LBB1_35
+; RV32-NEXT: beqz a0, .LBB1_32
; RV32-NEXT: # %bb.1: # %itofp-if-end
; RV32-NEXT: addi sp, sp, -224
; RV32-NEXT: sw s0, 220(sp) # 4-byte Folded Spill
@@ -776,38 +776,14 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV32-NEXT: sub t3, t2, a6
; RV32-NEXT: li t5, 25
; RV32-NEXT: sub t2, t4, a6
-; RV32-NEXT: blt t3, t5, .LBB1_29
+; RV32-NEXT: blt t3, t5, .LBB1_33
; RV32-NEXT: # %bb.26: # %itofp-if-then4
; RV32-NEXT: li t4, 26
-; RV32-NEXT: beq t3, t4, .LBB1_31
+; RV32-NEXT: beq t3, t4, .LBB1_29
; RV32-NEXT: # %bb.27: # %itofp-if-then4
; RV32-NEXT: li t4, 25
-; RV32-NEXT: bne t3, t4, .LBB1_30
-; RV32-NEXT: # %bb.28: # %itofp-sw-bb
-; RV32-NEXT: srli a2, t1, 31
-; RV32-NEXT: slli t0, t0, 1
-; RV32-NEXT: or t0, t0, a2
-; RV32-NEXT: slli t1, t1, 1
-; RV32-NEXT: j .LBB1_31
-; RV32-NEXT: .LBB1_29: # %itofp-if-else
-; RV32-NEXT: addi a2, a6, -176
-; RV32-NEXT: sw t1, 160(sp)
-; RV32-NEXT: sw zero, 144(sp)
-; RV32-NEXT: sw zero, 148(sp)
-; RV32-NEXT: sw zero, 152(sp)
-; RV32-NEXT: sw zero, 156(sp)
-; RV32-NEXT: sw zero, 128(sp)
-; RV32-NEXT: sw zero, 132(sp)
-; RV32-NEXT: sw zero, 136(sp)
-; RV32-NEXT: sw zero, 140(sp)
-; RV32-NEXT: srli a3, a2, 3
-; RV32-NEXT: andi a3, a3, 28
-; RV32-NEXT: addi a4, sp, 160
-; RV32-NEXT: sub a4, a4, a3
-; RV32-NEXT: lw a3, 0(a4)
-; RV32-NEXT: sll a2, a3, a2
-; RV32-NEXT: j .LBB1_34
-; RV32-NEXT: .LBB1_30: # %itofp-sw-default
+; RV32-NEXT: beq t3, t4, .LBB1_35
+; RV32-NEXT: # %bb.28: # %itofp-sw-default
; RV32-NEXT: zext.b a7, a7
; RV32-NEXT: li t5, 174
; RV32-NEXT: sw zero, 112(sp)
@@ -900,7 +876,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV32-NEXT: snez a2, a2
; RV32-NEXT: or t1, s0, a2
; RV32-NEXT: mv t0, t5
-; RV32-NEXT: .LBB1_31: # %itofp-sw-epilog
+; RV32-NEXT: .LBB1_29: # %itofp-sw-epilog
; RV32-NEXT: slli a2, t1, 29
; RV32-NEXT: srli a2, a2, 31
; RV32-NEXT: or a2, t1, a2
@@ -908,18 +884,12 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV32-NEXT: seqz a3, a2
; RV32-NEXT: slli a4, a2, 5
; RV32-NEXT: add t0, t0, a3
-; RV32-NEXT: bltz a4, .LBB1_33
-; RV32-NEXT: # %bb.32:
+; RV32-NEXT: bltz a4, .LBB1_34
+; RV32-NEXT: # %bb.30:
; RV32-NEXT: srli a2, a2, 2
; RV32-NEXT: slli t0, t0, 30
; RV32-NEXT: or a2, a2, t0
-; RV32-NEXT: j .LBB1_34
-; RV32-NEXT: .LBB1_33: # %itofp-if-then20
-; RV32-NEXT: srli a2, a2, 3
-; RV32-NEXT: slli t0, t0, 29
-; RV32-NEXT: or a2, a2, t0
-; RV32-NEXT: mv t2, t3
-; RV32-NEXT: .LBB1_34: # %itofp-if-end26
+; RV32-NEXT: .LBB1_31: # %itofp-if-end26
; RV32-NEXT: lui a3, 524288
; RV32-NEXT: slli t2, t2, 23
; RV32-NEXT: and a0, a0, a3
@@ -937,9 +907,39 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
; RV32-NEXT: lw s5, 200(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 196(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 224
-; RV32-NEXT: .LBB1_35: # %itofp-return
+; RV32-NEXT: .LBB1_32: # %itofp-return
; RV32-NEXT: sw a0, 0(a1)
; RV32-NEXT: ret
+; RV32-NEXT: .LBB1_33: # %itofp-if-else
+; RV32-NEXT: addi a2, a6, -176
+; RV32-NEXT: sw t1, 160(sp)
+; RV32-NEXT: sw zero, 144(sp)
+; RV32-NEXT: sw zero, 148(sp)
+; RV32-NEXT: sw zero, 152(sp)
+; RV32-NEXT: sw zero, 156(sp)
+; RV32-NEXT: sw zero, 128(sp)
+; RV32-NEXT: sw zero, 132(sp)
+; RV32-NEXT: sw zero, 136(sp)
+; RV32-NEXT: sw zero, 140(sp)
+; RV32-NEXT: srli a3, a2, 3
+; RV32-NEXT: andi a3, a3, 28
+; RV32-NEXT: addi a4, sp, 160
+; RV32-NEXT: sub a4, a4, a3
+; RV32-NEXT: lw a3, 0(a4)
+; RV32-NEXT: sll a2, a3, a2
+; RV32-NEXT: j .LBB1_31
+; RV32-NEXT: .LBB1_34: # %itofp-if-then20
+; RV32-NEXT: srli a2, a2, 3
+; RV32-NEXT: slli t0, t0, 29
+; RV32-NEXT: or a2, a2, t0
+; RV32-NEXT: mv t2, t3
+; RV32-NEXT: j .LBB1_31
+; RV32-NEXT: .LBB1_35: # %itofp-sw-bb
+; RV32-NEXT: srli a2, t1, 31
+; RV32-NEXT: slli t0, t0, 1
+; RV32-NEXT: or t0, t0, a2
+; RV32-NEXT: slli t1, t1, 1
+; RV32-NEXT: j .LBB1_29
%1 = load i200, ptr %in, align 8
%2 = sitofp i200 %1 to float
store float %2, ptr %out, align 4
@@ -963,40 +963,45 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV64-NEXT: sd s9, 168(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s10, 160(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s11, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT: lw s0, 0(a0)
-; RV64-NEXT: slli a0, s0, 33
+; RV64-NEXT: lw s1, 0(a0)
+; RV64-NEXT: slli a0, s1, 33
; RV64-NEXT: srli a0, a0, 56
; RV64-NEXT: li a2, 127
+; RV64-NEXT: mv s0, a1
; RV64-NEXT: bgeu a0, a2, .LBB2_2
; RV64-NEXT: # %bb.1:
-; RV64-NEXT: li s2, 0
+; RV64-NEXT: li s3, 0
; RV64-NEXT: li s10, 0
; RV64-NEXT: li a0, 0
-; RV64-NEXT: li a2, 0
-; RV64-NEXT: j .LBB2_10
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: j .LBB2_9
; RV64-NEXT: .LBB2_2: # %fp-to-i-if-end
-; RV64-NEXT: addi a2, a0, -327
-; RV64-NEXT: sltu a3, a2, a0
-; RV64-NEXT: addi a4, a3, -1
-; RV64-NEXT: zext.b a4, a4
-; RV64-NEXT: neg a5, a3
-; RV64-NEXT: xori a6, a4, 255
-; RV64-NEXT: or a5, a5, a6
-; RV64-NEXT: beqz a5, .LBB2_6
+; RV64-NEXT: addi a1, a0, -327
+; RV64-NEXT: sltu a2, a1, a0
+; RV64-NEXT: addi a3, a2, -1
+; RV64-NEXT: zext.b a3, a3
+; RV64-NEXT: neg a4, a2
+; RV64-NEXT: xori a5, a3, 255
+; RV64-NEXT: or a4, a4, a5
+; RV64-NEXT: beqz a4, .LBB2_4
; RV64-NEXT: # %bb.3: # %fp-to-i-if-end
-; RV64-NEXT: sltiu a2, a4, 256
-; RV64-NEXT: bnez a2, .LBB2_7
-; RV64-NEXT: .LBB2_4: # %fp-to-i-if-end9
-; RV64-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT: slli a1, s0, 41
-; RV64-NEXT: srai s0, s0, 63
+; RV64-NEXT: sltiu a1, a3, 256
+; RV64-NEXT: j .LBB2_5
+; RV64-NEXT: .LBB2_4:
+; RV64-NEXT: sltiu a1, a1, -200
+; RV64-NEXT: or a1, a2, a1
+; RV64-NEXT: .LBB2_5: # %fp-to-i-if-end
+; RV64-NEXT: bnez a1, .LBB2_10
+; RV64-NEXT: # %bb.6: # %fp-to-i-if-end9
+; RV64-NEXT: slli a1, s1, 41
+; RV64-NEXT: srai s1, s1, 63
; RV64-NEXT: lui a2, 2048
; RV64-NEXT: li a3, 149
; RV64-NEXT: srli a1, a1, 41
; RV64-NEXT: or a1, a1, a2
-; RV64-NEXT: ori s8, s0, 1
+; RV64-NEXT: ori s9, s1, 1
; RV64-NEXT: bltu a3, a0, .LBB2_8
-; RV64-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV64-NEXT: # %bb.7: # %fp-to-i-if-then12
; RV64-NEXT: sd zero, 48(sp)
; RV64-NEXT: sd zero, 56(sp)
; RV64-NEXT: sd zero, 64(sp)
@@ -1027,79 +1032,69 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV64-NEXT: srl a2, a2, a1
; RV64-NEXT: srl a0, a0, a1
; RV64-NEXT: or a3, a3, a6
-; RV64-NEXT: sd a3, 0(sp) # 8-byte Folded Spill
-; RV64-NEXT: or s5, a2, a7
-; RV64-NEXT: or s6, a0, a5
-; RV64-NEXT: srl s4, a4, a1
-; RV64-NEXT: mv a0, s6
+; RV64-NEXT: sd a3, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: or s6, a2, a7
+; RV64-NEXT: or s7, a0, a5
+; RV64-NEXT: srl a0, a4, a1
+; RV64-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT: mv a0, s7
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s8
+; RV64-NEXT: mv a2, s9
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
-; RV64-NEXT: mv s2, a0
-; RV64-NEXT: mv s7, a1
-; RV64-NEXT: mv a0, s5
+; RV64-NEXT: mv s3, a0
+; RV64-NEXT: mv s8, a1
+; RV64-NEXT: mv a0, s6
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s8
+; RV64-NEXT: mv a2, s9
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
-; RV64-NEXT: add s7, a0, s7
-; RV64-NEXT: sltu a0, s7, a0
-; RV64-NEXT: add s9, a1, a0
-; RV64-NEXT: mv a0, s6
+; RV64-NEXT: add s8, a0, s8
+; RV64-NEXT: sltu a0, s8, a0
+; RV64-NEXT: add s2, a1, a0
+; RV64-NEXT: mv a0, s7
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s0
+; RV64-NEXT: mv a2, s1
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
-; RV64-NEXT: add s10, a0, s7
+; RV64-NEXT: add s10, a0, s8
; RV64-NEXT: sltu a0, s10, a0
; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: add s11, s9, a0
-; RV64-NEXT: mv a0, s5
+; RV64-NEXT: add s11, s2, a0
+; RV64-NEXT: mv a0, s6
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s0
+; RV64-NEXT: mv a2, s1
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
-; RV64-NEXT: mv s1, a0
-; RV64-NEXT: mv s3, s8
-; RV64-NEXT: mv s8, a1
-; RV64-NEXT: add s7, a0, s11
-; RV64-NEXT: mv a0, s0
-; RV64-NEXT: mv a1, s0
-; RV64-NEXT: mv a2, s6
-; RV64-NEXT: mv a3, s5
+; RV64-NEXT: mv s4, a0
+; RV64-NEXT: mv s5, s9
+; RV64-NEXT: mv s9, a1
+; RV64-NEXT: add s8, a0, s11
+; RV64-NEXT: mv a0, s1
+; RV64-NEXT: mv a1, s1
+; RV64-NEXT: mv a2, s7
+; RV64-NEXT: mv a3, s6
; RV64-NEXT: call __multi3
-; RV64-NEXT: mv s5, a0
-; RV64-NEXT: mv s6, a1
-; RV64-NEXT: mv a0, s3
-; RV64-NEXT: mv a1, s0
-; RV64-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
-; RV64-NEXT: mv a3, s4
+; RV64-NEXT: mv s6, a0
+; RV64-NEXT: mv s7, a1
+; RV64-NEXT: mv a0, s5
+; RV64-NEXT: mv a1, s1
+; RV64-NEXT: ld a2, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld a3, 0(sp) # 8-byte Folded Reload
; RV64-NEXT: call __multi3
-; RV64-NEXT: add a2, s5, a0
-; RV64-NEXT: sltu a3, s7, s1
-; RV64-NEXT: sltu a4, s11, s9
-; RV64-NEXT: add a1, s6, a1
-; RV64-NEXT: add a0, s7, a2
-; RV64-NEXT: add a4, s8, a4
-; RV64-NEXT: sltu a2, a2, s5
-; RV64-NEXT: sltu a5, a0, s7
+; RV64-NEXT: add a2, s6, a0
+; RV64-NEXT: sltu a3, s8, s4
+; RV64-NEXT: sltu a4, s11, s2
+; RV64-NEXT: add a1, s7, a1
+; RV64-NEXT: add a0, s8, a2
+; RV64-NEXT: add a4, s9, a4
+; RV64-NEXT: sltu a2, a2, s6
+; RV64-NEXT: sltu a5, a0, s8
; RV64-NEXT: add a3, a4, a3
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: add a1, a3, a1
-; RV64-NEXT: add a2, a1, a5
+; RV64-NEXT: add a1, a1, a5
; RV64-NEXT: j .LBB2_9
-; RV64-NEXT: .LBB2_6:
-; RV64-NEXT: sltiu a2, a2, -200
-; RV64-NEXT: or a2, a3, a2
-; RV64-NEXT: beqz a2, .LBB2_4
-; RV64-NEXT: .LBB2_7: # %fp-to-i-if-then5
-; RV64-NEXT: srai s0, s0, 31
-; RV64-NEXT: not s2, s0
-; RV64-NEXT: xori a2, s0, 127
-; RV64-NEXT: mv s10, s2
-; RV64-NEXT: mv a0, s2
-; RV64-NEXT: j .LBB2_10
; RV64-NEXT: .LBB2_8: # %fp-to-i-if-else
; RV64-NEXT: sd a1, 112(sp)
; RV64-NEXT: sd zero, 120(sp)
@@ -1129,70 +1124,68 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV64-NEXT: sll a2, a2, a0
; RV64-NEXT: sll a5, a5, a0
; RV64-NEXT: sll a1, a1, a0
-; RV64-NEXT: or s3, a2, a6
+; RV64-NEXT: or s4, a2, a6
; RV64-NEXT: or a2, a5, a7
; RV64-NEXT: or a3, a1, a3
-; RV64-NEXT: sll s4, a4, a0
-; RV64-NEXT: mv a0, s8
-; RV64-NEXT: mv a1, s0
+; RV64-NEXT: sll s5, a4, a0
+; RV64-NEXT: mv a0, s9
+; RV64-NEXT: mv a1, s1
; RV64-NEXT: call __multi3
-; RV64-NEXT: mv s2, a0
-; RV64-NEXT: mv s5, a1
-; RV64-NEXT: mv a0, s0
-; RV64-NEXT: mv a1, s0
-; RV64-NEXT: mv a2, s4
-; RV64-NEXT: mv a3, s3
+; RV64-NEXT: mv s3, a0
+; RV64-NEXT: mv s6, a1
+; RV64-NEXT: mv a0, s1
+; RV64-NEXT: mv a1, s1
+; RV64-NEXT: mv a2, s5
+; RV64-NEXT: mv a3, s4
; RV64-NEXT: call __multi3
-; RV64-NEXT: add a1, a1, s5
-; RV64-NEXT: add s1, a0, s2
-; RV64-NEXT: sltu a0, s1, a0
-; RV64-NEXT: add s7, a1, a0
-; RV64-NEXT: mv a0, s3
+; RV64-NEXT: add a1, a1, s6
+; RV64-NEXT: add s2, a0, s3
+; RV64-NEXT: sltu a0, s2, a0
+; RV64-NEXT: add s8, a1, a0
+; RV64-NEXT: mv a0, s4
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s8
+; RV64-NEXT: mv a2, s9
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
-; RV64-NEXT: mv s5, a0
-; RV64-NEXT: mv s6, a1
-; RV64-NEXT: mv a0, s4
+; RV64-NEXT: mv s6, a0
+; RV64-NEXT: mv s7, a1
+; RV64-NEXT: mv a0, s5
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s8
+; RV64-NEXT: mv a2, s9
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
-; RV64-NEXT: mv s2, a0
-; RV64-NEXT: add s10, s5, a1
-; RV64-NEXT: sltu a0, s10, s5
-; RV64-NEXT: add s6, s6, a0
-; RV64-NEXT: mv a0, s4
+; RV64-NEXT: mv s3, a0
+; RV64-NEXT: add s10, s6, a1
+; RV64-NEXT: sltu a0, s10, s6
+; RV64-NEXT: add s7, s7, a0
+; RV64-NEXT: mv a0, s5
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s0
+; RV64-NEXT: mv a2, s1
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
; RV64-NEXT: add s10, a0, s10
; RV64-NEXT: sltu a0, s10, a0
; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: add s4, s6, a0
-; RV64-NEXT: sltu s5, s4, s6
-; RV64-NEXT: mv a0, s3
+; RV64-NEXT: add s5, s7, a0
+; RV64-NEXT: sltu s6, s5, s7
+; RV64-NEXT: mv a0, s4
; RV64-NEXT: li a1, 0
-; RV64-NEXT: mv a2, s0
+; RV64-NEXT: mv a2, s1
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __multi3
-; RV64-NEXT: add a1, a1, s5
-; RV64-NEXT: add s4, a0, s4
-; RV64-NEXT: sltu a2, s4, a0
-; RV64-NEXT: add a0, s4, s1
+; RV64-NEXT: add a1, a1, s6
+; RV64-NEXT: add s5, a0, s5
+; RV64-NEXT: sltu a2, s5, a0
+; RV64-NEXT: add a0, s5, s2
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: add a1, a1, s8
+; RV64-NEXT: sltu a2, a0, s5
; RV64-NEXT: add a1, a1, a2
-; RV64-NEXT: add a1, a1, s7
-; RV64-NEXT: sltu a2, a0, s4
-; RV64-NEXT: add a2, a1, a2
; RV64-NEXT: .LBB2_9: # %fp-to-i-cleanup
-; RV64-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT: .LBB2_10: # %fp-to-i-cleanup
-; RV64-NEXT: sd s2, 0(a1)
-; RV64-NEXT: sd s10, 8(a1)
-; RV64-NEXT: sd a0, 16(a1)
-; RV64-NEXT: sb a2, 24(a1)
+; RV64-NEXT: sd s3, 0(s0)
+; RV64-NEXT: sd s10, 8(s0)
+; RV64-NEXT: sd a0, 16(s0)
+; RV64-NEXT: sb a1, 24(s0)
; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 232(sp) # 8-byte Folded Reload
@@ -1208,23 +1201,30 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV64-NEXT: ld s11, 152(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 256
; RV64-NEXT: ret
+; RV64-NEXT: .LBB2_10: # %fp-to-i-if-then5
+; RV64-NEXT: srai s1, s1, 31
+; RV64-NEXT: not s3, s1
+; RV64-NEXT: xori a1, s1, 127
+; RV64-NEXT: mv s10, s3
+; RV64-NEXT: mv a0, s3
+; RV64-NEXT: j .LBB2_9
;
; RV32-LABEL: test_float_to_bitint_200:
; RV32: # %bb.0: # %fp-to-i-entry
-; RV32-NEXT: addi sp, sp, -336
-; RV32-NEXT: sw ra, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s0, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s3, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s4, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s5, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s6, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s7, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s8, 296(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s9, 292(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s10, 288(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s11, 284(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -320
+; RV32-NEXT: sw ra, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s3, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s4, 296(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s5, 292(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s6, 288(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s7, 284(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s8, 280(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s9, 276(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s10, 272(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s11, 268(sp) # 4-byte Folded Spill
; RV32-NEXT: lw a2, 0(a0)
; RV32-NEXT: slli a0, a2, 1
; RV32-NEXT: srli a0, a0, 24
@@ -1233,49 +1233,34 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a3, 0
; RV32-NEXT: li a2, 0
-; RV32-NEXT: li a4, 0
-; RV32-NEXT: li s3, 0
-; RV32-NEXT: li s8, 0
; RV32-NEXT: li s5, 0
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: li s0, 0
; RV32-NEXT: li a0, 0
; RV32-NEXT: j .LBB2_31
; RV32-NEXT: .LBB2_2: # %fp-to-i-if-end
-; RV32-NEXT: addi a3, a0, -327
-; RV32-NEXT: sltu a4, a3, a0
-; RV32-NEXT: sltiu a3, a3, -200
-; RV32-NEXT: addi a5, a4, -1
-; RV32-NEXT: or a3, a4, a3
+; RV32-NEXT: addi a4, a0, -327
+; RV32-NEXT: sltu a3, a4, a0
+; RV32-NEXT: sltiu a5, a4, -200
+; RV32-NEXT: addi a4, a3, -1
+; RV32-NEXT: or a3, a3, a5
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: and a3, a5, a3
-; RV32-NEXT: bnez a3, .LBB2_4
-; RV32-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: addi a3, a2, -1
-; RV32-NEXT: addi a0, a2, -129
-; RV32-NEXT: zext.b a0, a0
-; RV32-NEXT: mv a2, a3
-; RV32-NEXT: mv a4, a3
-; RV32-NEXT: mv s3, a3
-; RV32-NEXT: mv s8, a3
-; RV32-NEXT: mv s5, a3
-; RV32-NEXT: j .LBB2_31
-; RV32-NEXT: .LBB2_4: # %fp-to-i-if-end9
-; RV32-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
-; RV32-NEXT: srai s0, a2, 31
+; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: beqz a3, .LBB2_32
+; RV32-NEXT: # %bb.3: # %fp-to-i-if-end9
+; RV32-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT: srai s1, a2, 31
; RV32-NEXT: slli a2, a2, 9
; RV32-NEXT: lui a1, 2048
; RV32-NEXT: li a3, 149
-; RV32-NEXT: zext.b a4, s0
-; RV32-NEXT: sw a4, 132(sp) # 4-byte Folded Spill
+; RV32-NEXT: zext.b a4, s1
+; RV32-NEXT: sw a4, 116(sp) # 4-byte Folded Spill
; RV32-NEXT: srli a2, a2, 9
; RV32-NEXT: or a1, a2, a1
-; RV32-NEXT: ori s11, s0, 1
-; RV32-NEXT: bltu a3, a0, .LBB2_18
-; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
-; RV32-NEXT: sw zero, 200(sp)
-; RV32-NEXT: sw zero, 204(sp)
-; RV32-NEXT: sw zero, 208(sp)
-; RV32-NEXT: sw zero, 212(sp)
+; RV32-NEXT: ori s11, s1, 1
+; RV32-NEXT: bltu a3, a0, .LBB2_17
+; RV32-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32-NEXT: sw zero, 184(sp)
; RV32-NEXT: sw zero, 188(sp)
; RV32-NEXT: sw zero, 192(sp)
@@ -1284,12 +1269,16 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV32-NEXT: sw zero, 172(sp)
; RV32-NEXT: sw zero, 176(sp)
; RV32-NEXT: sw zero, 180(sp)
-; RV32-NEXT: sw a1, 152(sp)
+; RV32-NEXT: sw zero, 152(sp)
; RV32-NEXT: sw zero, 156(sp)
; RV32-NEXT: sw zero, 160(sp)
; RV32-NEXT: sw zero, 164(sp)
+; RV32-NEXT: sw a1, 136(sp)
+; RV32-NEXT: sw zero, 140(sp)
+; RV32-NEXT: sw zero, 144(sp)
+; RV32-NEXT: sw zero, 148(sp)
; RV32-NEXT: li a1, 150
-; RV32-NEXT: addi a2, sp, 152
+; RV32-NEXT: addi a2, sp, 136
; RV32-NEXT: sub t0, a1, a0
; RV32-NEXT: srli a0, t0, 3
; RV32-NEXT: andi a1, t0, 31
@@ -1300,424 +1289,415 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV32-NEXT: lw a2, 20(a0)
; RV32-NEXT: lw a6, 24(a0)
; RV32-NEXT: lw a3, 28(a0)
-; RV32-NEXT: sw a3, 88(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a3, 68(sp) # 4-byte Folded Spill
; RV32-NEXT: lw a3, 0(a0)
; RV32-NEXT: lw a4, 4(a0)
; RV32-NEXT: lw a5, 8(a0)
; RV32-NEXT: lw a0, 12(a0)
; RV32-NEXT: srl a7, a2, t0
-; RV32-NEXT: sw a7, 120(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a6, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a7, 104(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a6, 64(sp) # 4-byte Folded Spill
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: srl a7, a1, t0
-; RV32-NEXT: sw a7, 116(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a7, 100(sp) # 4-byte Folded Spill
; RV32-NEXT: slli a2, a2, 1
; RV32-NEXT: srl a7, a0, t0
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: sll a1, a1, t1
-; RV32-NEXT: or s3, a7, a1
+; RV32-NEXT: or s7, a7, a1
; RV32-NEXT: srl a1, a5, t0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: sll a0, a0, t1
-; RV32-NEXT: or s4, a1, a0
+; RV32-NEXT: or s8, a1, a0
; RV32-NEXT: srl a0, a4, t0
; RV32-NEXT: slli a5, a5, 1
-; RV32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw t0, 76(sp) # 4-byte Folded Spill
; RV32-NEXT: srl a1, a3, t0
; RV32-NEXT: slli a4, a4, 1
-; RV32-NEXT: sll s8, a6, t1
-; RV32-NEXT: sll s9, a2, t1
+; RV32-NEXT: sll s9, a6, t1
+; RV32-NEXT: sll s10, a2, t1
; RV32-NEXT: sll a2, a5, t1
-; RV32-NEXT: sw t1, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw t1, 72(sp) # 4-byte Folded Spill
; RV32-NEXT: sll a3, a4, t1
-; RV32-NEXT: or s5, a0, a2
-; RV32-NEXT: or s6, a1, a3
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: or s3, a0, a2
+; RV32-NEXT: or s4, a1, a3
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv s1, a1
-; RV32-NEXT: mv a0, s5
+; RV32-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s2, a1
+; RV32-NEXT: mv a0, s3
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s1, a0, s1
-; RV32-NEXT: sltu a0, s1, a0
-; RV32-NEXT: add s2, a1, a0
-; RV32-NEXT: sw s6, 140(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: add s2, a0, s2
+; RV32-NEXT: sltu a0, s2, a0
+; RV32-NEXT: add s0, a1, a0
+; RV32-NEXT: sw s4, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s1, a0, s1
-; RV32-NEXT: sw s1, 124(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu a0, s1, a0
+; RV32-NEXT: add s2, a0, s2
+; RV32-NEXT: sw s2, 108(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a0, s2, a0
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s1, s2, a0
-; RV32-NEXT: sw s5, 148(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s5
+; RV32-NEXT: add s2, s0, a0
+; RV32-NEXT: sw s3, 132(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s3
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s7, a0, s1
-; RV32-NEXT: sltu a2, s1, s2
-; RV32-NEXT: sltu a0, s7, a0
+; RV32-NEXT: add s5, a0, s2
+; RV32-NEXT: sltu a2, s2, s0
+; RV32-NEXT: sltu a0, s5, a0
; RV32-NEXT: add a1, a1, a2
-; RV32-NEXT: add s10, a1, a0
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: add s0, a1, a0
+; RV32-NEXT: mv a0, s8
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s5, a0
+; RV32-NEXT: mv s4, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: sw s3, 144(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: sw s7, 128(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s7
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s2, a0
-; RV32-NEXT: mv s1, a1
+; RV32-NEXT: mv s3, a0
+; RV32-NEXT: mv s2, a1
; RV32-NEXT: add s6, a0, s6
-; RV32-NEXT: sw s4, 112(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s8
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: add a2, a0, s6
-; RV32-NEXT: add s7, s5, s7
-; RV32-NEXT: add s10, a2, s10
-; RV32-NEXT: sltu s5, s7, s5
-; RV32-NEXT: add s10, s10, s5
-; RV32-NEXT: beq s10, a2, .LBB2_7
-; RV32-NEXT: # %bb.6: # %fp-to-i-if-then12
-; RV32-NEXT: sltu s5, s10, a2
-; RV32-NEXT: .LBB2_7: # %fp-to-i-if-then12
-; RV32-NEXT: lw a3, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT: or a3, a3, s8
-; RV32-NEXT: sw a3, 108(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw a3, 116(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s5, s4, s5
+; RV32-NEXT: add s11, a2, s0
+; RV32-NEXT: sltu s0, s5, s4
+; RV32-NEXT: add s11, s11, s0
+; RV32-NEXT: beq s11, a2, .LBB2_6
+; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT: sltu s0, s11, a2
+; RV32-NEXT: .LBB2_6: # %fp-to-i-if-then12
+; RV32-NEXT: lw a3, 104(sp) # 4-byte Folded Reload
; RV32-NEXT: or a3, a3, s9
-; RV32-NEXT: sw a3, 104(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu a3, s6, s2
+; RV32-NEXT: sw a3, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw a3, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT: or a3, a3, s10
+; RV32-NEXT: sw a3, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a3, s6, s3
; RV32-NEXT: sltu a0, a2, a0
-; RV32-NEXT: add a3, s1, a3
+; RV32-NEXT: add a3, s2, a3
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s1, a3, a0
-; RV32-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw a3, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s2, a3, a0
+; RV32-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 80(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT: add a0, a0, s1
-; RV32-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s4, a0, s5
-; RV32-NEXT: lw s1, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a0, s1
+; RV32-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: add a0, a0, s2
+; RV32-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s3, a0, s0
+; RV32-NEXT: lw s0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a0, s0
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s8, a0
+; RV32-NEXT: mv s7, a0
; RV32-NEXT: mv s9, a1
-; RV32-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s6, a0
-; RV32-NEXT: mv s5, a1
-; RV32-NEXT: add s11, a0, s9
-; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv s4, a1
+; RV32-NEXT: add s9, a0, s9
+; RV32-NEXT: mv a0, s0
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a2, a0, s11
-; RV32-NEXT: add s7, s8, s7
-; RV32-NEXT: sw s7, 120(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu s1, s7, s8
-; RV32-NEXT: add s3, a2, s1
-; RV32-NEXT: add s3, s3, s10
-; RV32-NEXT: beq s3, a2, .LBB2_9
-; RV32-NEXT: # %bb.8: # %fp-to-i-if-then12
-; RV32-NEXT: sltu s1, s3, a2
-; RV32-NEXT: .LBB2_9: # %fp-to-i-if-then12
-; RV32-NEXT: sw s3, 116(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu a3, s11, s6
+; RV32-NEXT: add a2, a0, s9
+; RV32-NEXT: add s5, s7, s5
+; RV32-NEXT: sw s5, 104(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu s0, s5, s7
+; RV32-NEXT: add a3, a2, s0
+; RV32-NEXT: add a3, a3, s11
+; RV32-NEXT: beq a3, a2, .LBB2_8
+; RV32-NEXT: # %bb.7: # %fp-to-i-if-then12
+; RV32-NEXT: sltu s0, a3, a2
+; RV32-NEXT: .LBB2_8: # %fp-to-i-if-then12
+; RV32-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a3, s9, s6
; RV32-NEXT: sltu a0, a2, a0
-; RV32-NEXT: add s10, s5, a3
+; RV32-NEXT: add s6, s4, a3
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s6, s10, a0
-; RV32-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s10, s6, a0
+; RV32-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s8, a0
-; RV32-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s5, a0, s6
-; RV32-NEXT: add s9, s5, s1
-; RV32-NEXT: add s2, s4, s9
-; RV32-NEXT: lw s11, 112(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a0, s11
+; RV32-NEXT: mv s7, a0
+; RV32-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s4, a0, s10
+; RV32-NEXT: add s9, s4, s0
+; RV32-NEXT: add s0, s3, s9
+; RV32-NEXT: lw s8, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a0, s8
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s7, a0, s2
-; RV32-NEXT: ori a0, s0, 1
+; RV32-NEXT: mv s11, a0
+; RV32-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s5, a0, s0
+; RV32-NEXT: ori a0, s1, 1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw s3, 104(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a2, s3
+; RV32-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s1, a0
-; RV32-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s1, a0, s1
-; RV32-NEXT: sltu a6, s2, s4
-; RV32-NEXT: lw a2, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a0, s4, a2
-; RV32-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: add a6, a0, s2
+; RV32-NEXT: sw a6, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a7, s0, s3
+; RV32-NEXT: lw a2, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu a0, s3, a2
+; RV32-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a1, a2, a1
-; RV32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
; RV32-NEXT: lw a3, 80(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a2, a3, a2
-; RV32-NEXT: sltu a3, s9, s5
-; RV32-NEXT: sltu a4, s5, s8
-; RV32-NEXT: sltu a5, s6, s10
-; RV32-NEXT: add s8, s7, s1
-; RV32-NEXT: mv s5, s1
-; RV32-NEXT: lw a7, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT: add a2, a7, a2
-; RV32-NEXT: lw a7, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT: add a5, a7, a5
-; RV32-NEXT: sltu s9, s8, s7
+; RV32-NEXT: sltu a3, s9, s4
+; RV32-NEXT: sltu a4, s4, s7
+; RV32-NEXT: sltu a5, s10, s6
+; RV32-NEXT: add a6, s5, a6
+; RV32-NEXT: lw t0, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a2, t0, a2
+; RV32-NEXT: lw t0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a5, t0, a5
+; RV32-NEXT: sw a6, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu s0, a6, s5
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: add a4, a5, a4
-; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: add s3, a1, a0
; RV32-NEXT: add a3, a4, a3
-; RV32-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT: add a3, a0, a3
-; RV32-NEXT: sw a6, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s4, a3, a6
-; RV32-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a3, s3, a3
+; RV32-NEXT: sw a7, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s9, a3, a7
+; RV32-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw s10, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s10, a0, s10
-; RV32-NEXT: mv a0, s11
+; RV32-NEXT: sw a1, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw s2, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s2, a0, s2
+; RV32-NEXT: mv a0, s8
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s10, a0, s10
-; RV32-NEXT: lw a0, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu s11, s7, a0
-; RV32-NEXT: add s1, s10, s11
-; RV32-NEXT: add s1, s1, s4
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv s4, a0
+; RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s2, a0, s2
+; RV32-NEXT: sltu s11, s5, s11
+; RV32-NEXT: add s5, s2, s11
+; RV32-NEXT: add s5, s5, s9
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s3
+; RV32-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw s2, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s2, a0, s2
-; RV32-NEXT: ori a0, s0, 1
+; RV32-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw s6, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s6, a0, s6
+; RV32-NEXT: ori a0, s1, 1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s2, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s2, a0, s2
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s6, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s10, a0, s6
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw s6, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw s6, 36(sp) # 4-byte Folded Reload
; RV32-NEXT: add s6, a0, s6
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s7, a0
-; RV32-NEXT: sw s6, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s6, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: add s6, a0, s6
-; RV32-NEXT: sw s2, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s2, s6, s2
-; RV32-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a0, s5, a0
-; RV32-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s2, s2, a0
-; RV32-NEXT: add s5, s2, s9
-; RV32-NEXT: add s5, s1, s5
-; RV32-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT: beq s5, s1, .LBB2_11
-; RV32-NEXT: # %bb.10: # %fp-to-i-if-then12
-; RV32-NEXT: sltu s9, s5, s1
-; RV32-NEXT: .LBB2_11: # %fp-to-i-if-then12
-; RV32-NEXT: lw a4, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw s10, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s10, s6, s10
+; RV32-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu s8, a2, a0
+; RV32-NEXT: add s10, s10, s8
+; RV32-NEXT: sw s0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s0, s10, s0
+; RV32-NEXT: add s0, s5, s0
+; RV32-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: beq s0, s5, .LBB2_10
+; RV32-NEXT: # %bb.9: # %fp-to-i-if-then12
+; RV32-NEXT: sltu a0, s0, s5
+; RV32-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT: .LBB2_10: # %fp-to-i-if-then12
+; RV32-NEXT: lw a4, 68(sp) # 4-byte Folded Reload
; RV32-NEXT: slli a1, a4, 1
-; RV32-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
-; RV32-NEXT: beq s1, s10, .LBB2_13
-; RV32-NEXT: # %bb.12: # %fp-to-i-if-then12
-; RV32-NEXT: sltu s11, s1, s10
-; RV32-NEXT: .LBB2_13: # %fp-to-i-if-then12
-; RV32-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: beq s5, s2, .LBB2_12
+; RV32-NEXT: # %bb.11: # %fp-to-i-if-then12
+; RV32-NEXT: sltu s11, s5, s2
+; RV32-NEXT: .LBB2_12: # %fp-to-i-if-then12
+; RV32-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
; RV32-NEXT: srl a0, a0, a2
-; RV32-NEXT: lw a3, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a3, 72(sp) # 4-byte Folded Reload
; RV32-NEXT: sll a1, a1, a3
-; RV32-NEXT: lw s3, 116(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a3, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT: beq s4, a3, .LBB2_15
-; RV32-NEXT: # %bb.14: # %fp-to-i-if-then12
-; RV32-NEXT: sltu a3, s4, a3
-; RV32-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT: .LBB2_15: # %fp-to-i-if-then12
-; RV32-NEXT: sw s9, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw s5, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT: beq s9, s3, .LBB2_14
+; RV32-NEXT: # %bb.13: # %fp-to-i-if-then12
+; RV32-NEXT: sltu s5, s9, s3
+; RV32-NEXT: .LBB2_14: # %fp-to-i-if-then12
; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: sw a0, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 72(sp) # 4-byte Folded Spill
; RV32-NEXT: srl a0, a4, a2
-; RV32-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: sw a0, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a1, s10, a1
-; RV32-NEXT: lw s1, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s1, s1, a0
-; RV32-NEXT: lw s4, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: sltu a1, s2, s4
+; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s2, s2, a0
+; RV32-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: add s4, s4, a1
-; RV32-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a0, a0, s1
+; RV32-NEXT: add a0, a0, s2
; RV32-NEXT: add a0, a0, s4
-; RV32-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: add a0, a0, s5
; RV32-NEXT: add s11, a0, s11
-; RV32-NEXT: lw s10, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT: beq s2, s6, .LBB2_17
-; RV32-NEXT: # %bb.16: # %fp-to-i-if-then12
-; RV32-NEXT: sltu s10, s2, s6
-; RV32-NEXT: .LBB2_17: # %fp-to-i-if-then12
-; RV32-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s9, 148(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a1, s9
-; RV32-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s5, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT: beq s10, s6, .LBB2_16
+; RV32-NEXT: # %bb.15: # %fp-to-i-if-then12
+; RV32-NEXT: sltu s8, s10, s6
+; RV32-NEXT: .LBB2_16: # %fp-to-i-if-then12
+; RV32-NEXT: lw a0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv s9, s8
+; RV32-NEXT: lw s8, 132(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a1, s8
+; RV32-NEXT: lw a2, 116(sp) # 4-byte Folded Reload
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s1, a0
-; RV32-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a2, s0
-; RV32-NEXT: mv a3, s0
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a2, s1
+; RV32-NEXT: mv a3, s1
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s1, a0, s1
-; RV32-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s2, a0, s2
+; RV32-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a0, a1, a0
; RV32-NEXT: sltu a1, s6, s7
-; RV32-NEXT: lw s2, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s2, s2, a0
-; RV32-NEXT: lw s4, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s3, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s3, s3, a0
+; RV32-NEXT: lw s4, 20(sp) # 4-byte Folded Reload
; RV32-NEXT: add s4, s4, a1
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s9
+; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a0, a0, s2
-; RV32-NEXT: lw a1, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a2, 68(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a0, a0, s3
+; RV32-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 48(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a1, a2, a1
-; RV32-NEXT: lw a2, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a3, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a2, a3, a2
; RV32-NEXT: add a0, a0, s4
-; RV32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s2, s2, a1
-; RV32-NEXT: add s4, a0, s1
-; RV32-NEXT: lw s1, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s1, s1, a2
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: lw s3, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s3, s3, a1
+; RV32-NEXT: add s4, a0, s2
+; RV32-NEXT: lw s2, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s2, s2, a2
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw s6, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s6, 84(sp) # 4-byte Folded Reload
; RV32-NEXT: mv a2, s6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a0, a0, s2
-; RV32-NEXT: add s2, a0, s1
-; RV32-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a0, a0, s3
+; RV32-NEXT: add s3, a0, s2
+; RV32-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
; RV32-NEXT: mv a1, s6
-; RV32-NEXT: mv a2, s0
-; RV32-NEXT: mv a3, s0
+; RV32-NEXT: mv a2, s1
+; RV32-NEXT: mv a3, s1
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s1, a0
-; RV32-NEXT: lw a0, 84(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32-NEXT: ori a2, s0, 1
-; RV32-NEXT: mv a3, s0
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: lw a0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: ori a2, s1, 1
+; RV32-NEXT: mv a3, s1
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a0, a0, s1
-; RV32-NEXT: add a0, s2, a0
+; RV32-NEXT: add a0, a0, s2
+; RV32-NEXT: add a0, s3, a0
; RV32-NEXT: add a0, s4, a0
-; RV32-NEXT: add a0, a0, s10
+; RV32-NEXT: add a0, a0, s9
; RV32-NEXT: add a0, s11, a0
-; RV32-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: zext.b a0, a0
-; RV32-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a3, 128(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a4, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT: j .LBB2_31
-; RV32-NEXT: .LBB2_18: # %fp-to-i-if-else
-; RV32-NEXT: sw zero, 264(sp)
-; RV32-NEXT: sw zero, 268(sp)
-; RV32-NEXT: sw zero, 272(sp)
-; RV32-NEXT: sw zero, 276(sp)
-; RV32-NEXT: sw a1, 248(sp)
+; RV32-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a3, 112(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT: j .LBB2_30
+; RV32-NEXT: .LBB2_17: # %fp-to-i-if-else
+; RV32-NEXT: sw zero, 248(sp)
; RV32-NEXT: sw zero, 252(sp)
; RV32-NEXT: sw zero, 256(sp)
; RV32-NEXT: sw zero, 260(sp)
-; RV32-NEXT: sw zero, 232(sp)
+; RV32-NEXT: sw a1, 232(sp)
; RV32-NEXT: sw zero, 236(sp)
; RV32-NEXT: sw zero, 240(sp)
; RV32-NEXT: sw zero, 244(sp)
@@ -1725,8 +1705,12 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV32-NEXT: sw zero, 220(sp)
; RV32-NEXT: sw zero, 224(sp)
; RV32-NEXT: sw zero, 228(sp)
+; RV32-NEXT: sw zero, 200(sp)
+; RV32-NEXT: sw zero, 204(sp)
+; RV32-NEXT: sw zero, 208(sp)
+; RV32-NEXT: sw zero, 212(sp)
; RV32-NEXT: addi a7, a0, -150
-; RV32-NEXT: addi a0, sp, 248
+; RV32-NEXT: addi a0, sp, 232
; RV32-NEXT: srli a1, a7, 3
; RV32-NEXT: andi a2, a7, 31
; RV32-NEXT: andi a1, a1, 28
@@ -1735,438 +1719,446 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
; RV32-NEXT: lw a1, 16(a0)
; RV32-NEXT: lw a5, 20(a0)
; RV32-NEXT: lw a2, 24(a0)
-; RV32-NEXT: sw a2, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a2, 64(sp) # 4-byte Folded Spill
; RV32-NEXT: lw a2, 28(a0)
-; RV32-NEXT: sw a2, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a2, 68(sp) # 4-byte Folded Spill
; RV32-NEXT: lw a2, 0(a0)
; RV32-NEXT: lw a3, 4(a0)
; RV32-NEXT: lw a4, 8(a0)
; RV32-NEXT: lw a0, 12(a0)
-; RV32-NEXT: sw a5, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a5, 60(sp) # 4-byte Folded Spill
; RV32-NEXT: sll a5, a5, a7
-; RV32-NEXT: sw a5, 140(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a5, 104(sp) # 4-byte Folded Spill
; RV32-NEXT: srli a5, a1, 1
; RV32-NEXT: sll a1, a1, a7
-; RV32-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
; RV32-NEXT: srli a1, a0, 1
; RV32-NEXT: sll a0, a0, a7
; RV32-NEXT: srli a6, a4, 1
; RV32-NEXT: sll a4, a4, a7
; RV32-NEXT: srl a6, a6, t0
-; RV32-NEXT: or s3, a0, a6
+; RV32-NEXT: or s9, a0, a6
; RV32-NEXT: srli a0, a3, 1
; RV32-NEXT: sll a3, a3, a7
; RV32-NEXT: srl a0, a0, t0
-; RV32-NEXT: or s4, a4, a0
+; RV32-NEXT: or s8, a4, a0
; RV32-NEXT: srli a0, a2, 1
-; RV32-NEXT: srl s8, a5, t0
-; RV32-NEXT: srl s9, a1, t0
-; RV32-NEXT: sw t0, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT: srl s7, a5, t0
+; RV32-NEXT: srl s10, a1, t0
+; RV32-NEXT: sw t0, 72(sp) # 4-byte Folded Spill
; RV32-NEXT: srl a0, a0, t0
-; RV32-NEXT: or s6, a3, a0
-; RV32-NEXT: sw a7, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT: sll s7, a2, a7
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: or s4, a3, a0
+; RV32-NEXT: sw a7, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: sll s5, a2, a7
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s1, a0
-; RV32-NEXT: mv s2, a1
-; RV32-NEXT: mv a0, s7
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: mv s3, a1
+; RV32-NEXT: mv a0, s5
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a0, 128(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s5, s1, a1
-; RV32-NEXT: sltu a0, s5, s1
-; RV32-NEXT: add s2, s2, a0
-; RV32-NEXT: sw s7, 144(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s7
+; RV32-NEXT: sw a0, 112(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s0, s2, a1
+; RV32-NEXT: sltu a0, s0, s2
+; RV32-NEXT: add s3, s3, a0
+; RV32-NEXT: sw s5, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s5
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s5, a0, s5
-; RV32-NEXT: sw s5, 124(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu a0, s5, a0
+; RV32-NEXT: add s0, a0, s0
+; RV32-NEXT: sw s0, 108(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a0, s0, a0
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s1, s2, a0
-; RV32-NEXT: sltu s2, s1, s2
-; RV32-NEXT: sw s6, 148(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: add s0, s3, a0
+; RV32-NEXT: sltu s2, s0, s3
+; RV32-NEXT: sw s4, 132(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: add a1, a1, s2
-; RV32-NEXT: add s10, a0, s1
-; RV32-NEXT: sltu a0, s10, a0
-; RV32-NEXT: add s6, a1, a0
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: add s0, a0, s0
+; RV32-NEXT: sltu a0, s0, a0
+; RV32-NEXT: add s5, a1, a0
+; RV32-NEXT: mv a0, s8
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s5, a0
-; RV32-NEXT: mv s7, a1
-; RV32-NEXT: sw s3, 112(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: mv s4, a0
+; RV32-NEXT: mv s6, a1
+; RV32-NEXT: sw s9, 128(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s9
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s11
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s2, a0
-; RV32-NEXT: mv s1, a1
-; RV32-NEXT: add s7, a0, s7
-; RV32-NEXT: sw s4, 108(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: mv s3, a0
+; RV32-NEXT: mv s2, a1
+; RV32-NEXT: add s6, a0, s6
+; RV32-NEXT: sw s8, 88(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s8
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a2, a0, s7
-; RV32-NEXT: add s10, s5, s10
-; RV32-NEXT: add s6, a2, s6
-; RV32-NEXT: sltu s5, s10, s5
-; RV32-NEXT: add s11, s6, s5
-; RV32-NEXT: beq s11, a2, .LBB2_20
-; RV32-NEXT: # %bb.19: # %fp-to-i-if-else
-; RV32-NEXT: sltu s5, s11, a2
-; RV32-NEXT: .LBB2_20: # %fp-to-i-if-else
-; RV32-NEXT: lw a3, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT: or a3, a3, s8
-; RV32-NEXT: sw a3, 104(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw a3, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT: or a3, a3, s9
-; RV32-NEXT: sw a3, 140(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu a3, s7, s2
+; RV32-NEXT: add a2, a0, s6
+; RV32-NEXT: add s11, s4, s0
+; RV32-NEXT: add s5, a2, s5
+; RV32-NEXT: sltu s0, s11, s4
+; RV32-NEXT: add s9, s5, s0
+; RV32-NEXT: beq s9, a2, .LBB2_19
+; RV32-NEXT: # %bb.18: # %fp-to-i-if-else
+; RV32-NEXT: sltu s0, s9, a2
+; RV32-NEXT: .LBB2_19: # %fp-to-i-if-else
+; RV32-NEXT: lw a3, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT: or a3, a3, s7
+; RV32-NEXT: sw a3, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw a3, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT: or a3, a3, s10
+; RV32-NEXT: sw a3, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a3, s6, s3
; RV32-NEXT: sltu a0, a2, a0
-; RV32-NEXT: add a3, s1, a3
+; RV32-NEXT: add a3, s2, a3
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s1, a3, a0
-; RV32-NEXT: lw a0, 112(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw a3, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s2, a3, a0
+; RV32-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s1, a0, s1
-; RV32-NEXT: sw s1, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s2, s1, s5
-; RV32-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s2, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s2, a0, s2
+; RV32-NEXT: sw s2, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s3, s2, s0
+; RV32-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s7, a0
-; RV32-NEXT: mv s5, a1
-; RV32-NEXT: lw s1, 144(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv s6, a0
+; RV32-NEXT: mv s4, a1
+; RV32-NEXT: lw s0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a0, s0
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s8, a0
-; RV32-NEXT: add s4, s7, a1
-; RV32-NEXT: mv a0, s1
+; RV32-NEXT: add s2, s6, a1
+; RV32-NEXT: mv a0, s0
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a2, a0, s4
-; RV32-NEXT: add s10, s8, s10
-; RV32-NEXT: sw s10, 120(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu s1, s10, s8
-; RV32-NEXT: add s3, a2, s1
-; RV32-NEXT: add s3, s3, s11
-; RV32-NEXT: beq s3, a2, .LBB2_22
-; RV32-NEXT: # %bb.21: # %fp-to-i-if-else
-; RV32-NEXT: sltu s1, s3, a2
-; RV32-NEXT: .LBB2_22: # %fp-to-i-if-else
-; RV32-NEXT: sw s3, 116(sp) # 4-byte Folded Spill
-; RV32-NEXT: sltu a3, s4, s7
+; RV32-NEXT: add a2, a0, s2
+; RV32-NEXT: add s11, s8, s11
+; RV32-NEXT: sw s11, 104(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu s0, s11, s8
+; RV32-NEXT: add a3, a2, s0
+; RV32-NEXT: add a3, a3, s9
+; RV32-NEXT: beq a3, a2, .LBB2_21
+; RV32-NEXT: # %bb.20: # %fp-to-i-if-else
+; RV32-NEXT: sltu s0, a3, a2
+; RV32-NEXT: .LBB2_21: # %fp-to-i-if-else
+; RV32-NEXT: sw a3, 100(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a3, s2, s6
; RV32-NEXT: sltu a0, a2, a0
-; RV32-NEXT: add s5, s5, a3
+; RV32-NEXT: add s8, s4, a3
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s9, s5, a0
-; RV32-NEXT: lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s10, s8, a0
+; RV32-NEXT: lw a0, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s10, a0
-; RV32-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s8, a0, s9
-; RV32-NEXT: add s3, s8, s1
-; RV32-NEXT: add s1, s2, s3
-; RV32-NEXT: lw s6, 108(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: mv s5, a0
+; RV32-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s9, a0, s10
+; RV32-NEXT: add s11, s9, s0
+; RV32-NEXT: add s0, s3, s11
+; RV32-NEXT: lw s7, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a0, s7
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s11, a0
-; RV32-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s4, a0, s1
-; RV32-NEXT: ori a0, s0, 1
+; RV32-NEXT: mv s6, a0
+; RV32-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s4, a0, s0
+; RV32-NEXT: ori a0, s1, 1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s7, a0
-; RV32-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: sw a1, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s7, a0, s7
-; RV32-NEXT: sltu a6, s1, s2
-; RV32-NEXT: lw a3, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: add a5, a0, s2
+; RV32-NEXT: sw a5, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu a6, s0, s3
+; RV32-NEXT: lw a3, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a0, a3, a0
-; RV32-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a1, a2, a1
-; RV32-NEXT: sltu a2, s2, a3
-; RV32-NEXT: sltu a3, s3, s8
+; RV32-NEXT: sltu s3, s3, a3
+; RV32-NEXT: sltu a2, s11, s9
+; RV32-NEXT: sltu a3, s10, s8
; RV32-NEXT: sltu a4, s9, s5
-; RV32-NEXT: sltu a5, s8, s10
-; RV32-NEXT: add s8, s4, s7
-; RV32-NEXT: mv s2, s7
-; RV32-NEXT: lw a7, 96(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a5, s4, a5
+; RV32-NEXT: lw a7, 80(sp) # 4-byte Folded Reload
; RV32-NEXT: add a1, a7, a1
-; RV32-NEXT: lw a7, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT: add a4, a7, a4
-; RV32-NEXT: sltu s5, s8, s4
+; RV32-NEXT: lw a7, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a3, a7, a3
+; RV32-NEXT: sw a5, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT: sltu s0, a5, s4
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add a4, a4, a5
-; RV32-NEXT: add s10, a0, a2
-; RV32-NEXT: add a3, a4, a3
-; RV32-NEXT: add a3, s10, a3
-; RV32-NEXT: sw a6, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s3, a3, a6
-; RV32-NEXT: lw s9, 112(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a0, s9
+; RV32-NEXT: add a3, a3, a4
+; RV32-NEXT: add s3, a0, s3
+; RV32-NEXT: add a2, a3, a2
+; RV32-NEXT: add a2, s3, a2
+; RV32-NEXT: sw a6, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s11, a2, a6
+; RV32-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw s1, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s1, a0, s1
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw s9, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s9, a0, s9
+; RV32-NEXT: mv a0, s7
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw s1, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s6, a0, s1
-; RV32-NEXT: sltu s7, s4, s11
-; RV32-NEXT: add s11, s6, s7
-; RV32-NEXT: add s11, s11, s3
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv s2, a0
+; RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s9, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s9, a0, s9
+; RV32-NEXT: sltu s8, s4, s6
+; RV32-NEXT: add s10, s9, s8
+; RV32-NEXT: add s10, s10, s11
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s1, a0
-; RV32-NEXT: sw a1, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv s4, a0
+; RV32-NEXT: sw a1, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw a1, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw a1, 36(sp) # 4-byte Folded Reload
; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: sw s1, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s1, s1, a0
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: sw s4, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s4, s4, a0
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 92(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s4, a0, s4
-; RV32-NEXT: ori a0, s0, 1
+; RV32-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: lw s6, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s6, a0, s6
+; RV32-NEXT: ori a0, s1, 1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: lw a2, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: add a0, a0, s4
-; RV32-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s4, s1, a0
-; RV32-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a0, s2, a0
-; RV32-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s4, s4, a0
-; RV32-NEXT: sw s5, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT: add s5, s4, s5
-; RV32-NEXT: add s5, s11, s5
-; RV32-NEXT: sw a1, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT: beq s5, s11, .LBB2_24
-; RV32-NEXT: # %bb.23: # %fp-to-i-if-else
-; RV32-NEXT: sltu a0, s5, s11
-; RV32-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT: .LBB2_24: # %fp-to-i-if-else
-; RV32-NEXT: lw a2, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv s7, a0
+; RV32-NEXT: sw s6, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s6, a0, s6
+; RV32-NEXT: add s5, s4, s6
+; RV32-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu a0, a2, a0
+; RV32-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s5, s5, a0
+; RV32-NEXT: sw s0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT: add s0, s5, s0
+; RV32-NEXT: add s0, s10, s0
+; RV32-NEXT: sw a1, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT: beq s0, s10, .LBB2_23
+; RV32-NEXT: # %bb.22: # %fp-to-i-if-else
+; RV32-NEXT: sltu a0, s0, s10
+; RV32-NEXT: sw a0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT: .LBB2_23: # %fp-to-i-if-else
+; RV32-NEXT: lw a2, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: srli a2, a2, 1
-; RV32-NEXT: lw a0, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
; RV32-NEXT: srli a1, a0, 1
-; RV32-NEXT: beq s11, s6, .LBB2_26
-; RV32-NEXT: # %bb.25: # %fp-to-i-if-else
-; RV32-NEXT: sltu s7, s11, s6
-; RV32-NEXT: .LBB2_26: # %fp-to-i-if-else
-; RV32-NEXT: lw a3, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT: beq s10, s9, .LBB2_25
+; RV32-NEXT: # %bb.24: # %fp-to-i-if-else
+; RV32-NEXT: sltu s8, s10, s9
+; RV32-NEXT: .LBB2_25: # %fp-to-i-if-else
+; RV32-NEXT: lw a3, 76(sp) # 4-byte Folded Reload
; RV32-NEXT: sll a0, a0, a3
-; RV32-NEXT: lw a4, 84(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a4, 72(sp) # 4-byte Folded Reload
; RV32-NEXT: srl a2, a2, a4
-; RV32-NEXT: lw a5, 80(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a5, 68(sp) # 4-byte Folded Reload
; RV32-NEXT: sll a3, a5, a3
; RV32-NEXT: srl a1, a1, a4
-; RV32-NEXT: beq s3, s10, .LBB2_28
-; RV32-NEXT: # %bb.27: # %fp-to-i-if-else
-; RV32-NEXT: sltu a4, s3, s10
-; RV32-NEXT: sw a4, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT: .LBB2_28: # %fp-to-i-if-else
-; RV32-NEXT: or s11, a0, a2
-; RV32-NEXT: or s10, a3, a1
-; RV32-NEXT: lw a0, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a1, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: lw s10, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT: beq s11, s3, .LBB2_27
+; RV32-NEXT: # %bb.26: # %fp-to-i-if-else
+; RV32-NEXT: sltu s10, s11, s3
+; RV32-NEXT: .LBB2_27: # %fp-to-i-if-else
+; RV32-NEXT: or s3, a0, a2
+; RV32-NEXT: or s11, a3, a1
+; RV32-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw a1, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a1, s6, a1
-; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu a0, a1, a0
+; RV32-NEXT: sltu a1, s9, s2
+; RV32-NEXT: lw s2, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: add s2, s2, a0
-; RV32-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s3, s3, a1
-; RV32-NEXT: mv a0, s9
+; RV32-NEXT: lw s9, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s9, s9, a1
+; RV32-NEXT: lw a0, 128(sp) # 4-byte Folded Reload
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s0
+; RV32-NEXT: mv a2, s1
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: add a0, a0, s2
-; RV32-NEXT: add a0, a0, s3
-; RV32-NEXT: lw a1, 96(sp) # 4-byte Folded Reload
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: add s7, a0, s7
-; RV32-NEXT: beq s4, s1, .LBB2_30
-; RV32-NEXT: # %bb.29: # %fp-to-i-if-else
-; RV32-NEXT: sltu a0, s4, s1
-; RV32-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT: .LBB2_30: # %fp-to-i-if-else
-; RV32-NEXT: lw a0, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s6, 104(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a1, s6
-; RV32-NEXT: mv a2, s0
-; RV32-NEXT: mv a3, s0
+; RV32-NEXT: add a0, a0, s9
+; RV32-NEXT: add a0, a0, s10
+; RV32-NEXT: add s8, a0, s8
+; RV32-NEXT: lw s10, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT: beq s5, s4, .LBB2_29
+; RV32-NEXT: # %bb.28: # %fp-to-i-if-else
+; RV32-NEXT: sltu s10, s5, s4
+; RV32-NEXT: .LBB2_29: # %fp-to-i-if-else
+; RV32-NEXT: lw a0, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s9, 84(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a1, s9
+; RV32-NEXT: mv a2, s1
+; RV32-NEXT: mv a3, s1
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s2, a0
-; RV32-NEXT: mv a0, s11
-; RV32-NEXT: mv a1, s10
-; RV32-NEXT: ori a2, s0, 1
-; RV32-NEXT: mv a3, s0
+; RV32-NEXT: mv a0, s3
+; RV32-NEXT: mv a1, s11
+; RV32-NEXT: ori a2, s1, 1
+; RV32-NEXT: mv a3, s1
; RV32-NEXT: call __muldi3
; RV32-NEXT: add s2, a0, s2
-; RV32-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a1, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a0, a1, a0
-; RV32-NEXT: lw a1, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a2, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a1, a2, a1
-; RV32-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu a1, s6, s7
+; RV32-NEXT: lw s3, 20(sp) # 4-byte Folded Reload
; RV32-NEXT: add s3, s3, a0
-; RV32-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s4, s4, a1
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: lw s5, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s5, s5, a1
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
-; RV32-NEXT: mv a2, s6
+; RV32-NEXT: mv a2, s9
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: add a0, a0, s3
-; RV32-NEXT: add a0, a0, s4
+; RV32-NEXT: add a0, a0, s5
; RV32-NEXT: add s3, a0, s2
-; RV32-NEXT: lw a0, 108(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a1, s9
-; RV32-NEXT: mv a2, s0
-; RV32-NEXT: mv a3, s0
+; RV32-NEXT: lw a0, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
+; RV32-NEXT: mv a2, s1
+; RV32-NEXT: mv a3, s1
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s2, a0
-; RV32-NEXT: lw a0, 144(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s6, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s6, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: mv a1, s6
-; RV32-NEXT: lw a2, 132(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 116(sp) # 4-byte Folded Reload
; RV32-NEXT: call __muldi3
; RV32-NEXT: add s2, s2, a0
-; RV32-NEXT: lw a0, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT: sltu a0, s1, a0
-; RV32-NEXT: lw a1, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a2, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT: sltu a0, s4, a0
+; RV32-NEXT: lw a1, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
; RV32-NEXT: sltu a1, a2, a1
-; RV32-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s1, s1, a0
-; RV32-NEXT: lw s4, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT: add s4, s4, a1
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: lw s4, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s4, s4, a0
+; RV32-NEXT: lw s5, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s5, s5, a1
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s6
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add a0, a0, s4
+; RV32-NEXT: add a0, a0, s5
; RV32-NEXT: add a0, a0, s2
-; RV32-NEXT: add a0, a0, s1
-; RV32-NEXT: lw a1, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: add a0, a0, s4
+; RV32-NEXT: add a0, a0, s10
; RV32-NEXT: add a0, a0, s3
-; RV32-NEXT: add a0, s7, a0
-; RV32-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT: add a0, s8, a0
+; RV32-NEXT: lw a1, 80(sp) # 4-byte Folded Reload
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: zext.b a0, a0
-; RV32-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a3, 128(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a2, 124(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw a4, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s3, 116(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a3, 112(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a2, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s5, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT: .LBB2_30: # %fp-to-i-cleanup
+; RV32-NEXT: lw a4, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw a5, 96(sp) # 4-byte Folded Reload
; RV32-NEXT: .LBB2_31: # %fp-to-i-cleanup
; RV32-NEXT: sw a3, 0(a1)
; RV32-NEXT: sw a2, 4(a1)
-; RV32-NEXT: sw a4, 8(a1)
-; RV32-NEXT: sw s3, 12(a1)
-; RV32-NEXT: sw s8, 16(a1)
-; RV32-NEXT: sw s5, 20(a1)
+; RV32-NEXT: sw s5, 8(a1)
+; RV32-NEXT: sw a4, 12(a1)
+; RV32-NEXT: sw a5, 16(a1)
+; RV32-NEXT: sw s0, 20(a1)
; RV32-NEXT: sb a0, 24(a1)
-; RV32-NEXT: lw ra, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s0, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s1, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s2, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s3, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s4, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s5, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s6, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s7, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s8, 296(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s9, 292(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s10, 288(sp) # 4-byte Folded Reload
-; RV32-NEXT: lw s11, 284(sp) # 4-byte Folded Reload
-; RV32-NEXT: addi sp, sp, 336
+; RV32-NEXT: lw ra, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s2, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s3, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s4, 296(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s5, 292(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s6, 288(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s7, 284(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s8, 280(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s9, 276(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s10, 272(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s11, 268(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 320
; RV32-NEXT: ret
+; RV32-NEXT: .LBB2_32: # %fp-to-i-if-then5
+; RV32-NEXT: srli a2, a2, 31
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: addi a0, a2, -129
+; RV32-NEXT: zext.b a0, a0
+; RV32-NEXT: mv a2, a3
+; RV32-NEXT: mv s5, a3
+; RV32-NEXT: mv a4, a3
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: mv s0, a3
+; RV32-NEXT: j .LBB2_31
%1 = load float, ptr %in, align 4
%2 = fptosi float %1 to i200
store i200 %2, ptr %out, align 8
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 5e398191e0083..6d2bc6c5ebb43 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -1111,32 +1111,25 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: .cfi_offset s9, -44
; RV32IF-NEXT: .cfi_offset s10, -48
; RV32IF-NEXT: .cfi_offset s11, -52
+; RV32IF-NEXT: .cfi_remember_state
; RV32IF-NEXT: slli a2, a1, 1
; RV32IF-NEXT: srli a2, a2, 21
; RV32IF-NEXT: li a3, 1023
; RV32IF-NEXT: bgeu a2, a3, .LBB18_2
; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: li s2, 0
-; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: li s3, 0
; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: j .LBB18_7
+; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: j .LBB18_6
; RV32IF-NEXT: .LBB18_2: # %fp-to-i-if-end
; RV32IF-NEXT: addi a3, a2, -1151
; RV32IF-NEXT: sltu a4, a3, a2
; RV32IF-NEXT: sltiu a3, a3, -128
; RV32IF-NEXT: or a4, a4, a3
; RV32IF-NEXT: srli a3, a1, 31
-; RV32IF-NEXT: beqz a4, .LBB18_4
-; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT: xori a0, a3, 1
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: neg s2, a0
-; RV32IF-NEXT: sub a2, a2, a0
-; RV32IF-NEXT: mv a1, s2
-; RV32IF-NEXT: mv a0, s2
-; RV32IF-NEXT: j .LBB18_7
-; RV32IF-NEXT: .LBB18_4: # %fp-to-i-if-end9
+; RV32IF-NEXT: bnez a4, .LBB18_20
+; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IF-NEXT: neg s0, a3
; RV32IF-NEXT: slli a1, a1, 12
; RV32IF-NEXT: lui a3, 256
@@ -1144,8 +1137,8 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: srli a1, a1, 12
; RV32IF-NEXT: or a1, a1, a3
; RV32IF-NEXT: ori s8, s0, 1
-; RV32IF-NEXT: bltu a4, a2, .LBB18_6
-; RV32IF-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT: bltu a4, a2, .LBB18_5
+; RV32IF-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IF-NEXT: sw zero, 24(sp)
; RV32IF-NEXT: sw zero, 28(sp)
; RV32IF-NEXT: sw zero, 32(sp)
@@ -1177,70 +1170,68 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: srl a1, a1, a0
; RV32IF-NEXT: or a3, a3, a6
; RV32IF-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: or s5, a2, a7
-; RV32IF-NEXT: or s6, a1, a5
-; RV32IF-NEXT: srl s4, a4, a0
-; RV32IF-NEXT: mv a0, s6
+; RV32IF-NEXT: or s6, a2, a7
+; RV32IF-NEXT: or s7, a1, a5
+; RV32IF-NEXT: srl s5, a4, a0
+; RV32IF-NEXT: mv a0, s7
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: mv s2, a0
-; RV32IF-NEXT: mv s7, a1
-; RV32IF-NEXT: mv a0, s5
+; RV32IF-NEXT: mv s3, a1
+; RV32IF-NEXT: mv a0, s6
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: add s7, a0, s7
-; RV32IF-NEXT: sltu a0, s7, a0
-; RV32IF-NEXT: add s9, a1, a0
-; RV32IF-NEXT: mv a0, s6
+; RV32IF-NEXT: add s3, a0, s3
+; RV32IF-NEXT: sltu a0, s3, a0
+; RV32IF-NEXT: add s10, a1, a0
+; RV32IF-NEXT: mv a0, s7
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: add s10, a0, s7
-; RV32IF-NEXT: sltu a0, s10, a0
+; RV32IF-NEXT: add s3, a0, s3
+; RV32IF-NEXT: sltu a0, s3, a0
; RV32IF-NEXT: add a0, a1, a0
-; RV32IF-NEXT: add s11, s9, a0
-; RV32IF-NEXT: mv a0, s5
+; RV32IF-NEXT: add s11, s10, a0
+; RV32IF-NEXT: mv a0, s6
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: mv s1, a0
-; RV32IF-NEXT: mv s3, s8
-; RV32IF-NEXT: mv s8, a1
-; RV32IF-NEXT: add s7, a0, s11
+; RV32IF-NEXT: mv s9, a1
+; RV32IF-NEXT: mv s4, s8
+; RV32IF-NEXT: add s8, a0, s11
; RV32IF-NEXT: mv a0, s0
; RV32IF-NEXT: mv a1, s0
-; RV32IF-NEXT: mv a2, s6
-; RV32IF-NEXT: mv a3, s5
+; RV32IF-NEXT: mv a2, s7
+; RV32IF-NEXT: mv a3, s6
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: mv s5, a0
-; RV32IF-NEXT: mv s6, a1
-; RV32IF-NEXT: mv a0, s3
+; RV32IF-NEXT: mv s6, a0
+; RV32IF-NEXT: mv s7, a1
+; RV32IF-NEXT: mv a0, s4
; RV32IF-NEXT: mv a1, s0
; RV32IF-NEXT: lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: mv a3, s4
+; RV32IF-NEXT: mv a3, s5
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: mv a2, a1
-; RV32IF-NEXT: mv a1, s10
-; RV32IF-NEXT: add a3, s5, a0
-; RV32IF-NEXT: sltu a4, s7, s1
-; RV32IF-NEXT: sltu a5, s11, s9
-; RV32IF-NEXT: add a2, s6, a2
-; RV32IF-NEXT: add a0, s7, a3
-; RV32IF-NEXT: add a5, s8, a5
-; RV32IF-NEXT: sltu a3, a3, s5
-; RV32IF-NEXT: sltu a6, a0, s7
-; RV32IF-NEXT: add a4, a5, a4
-; RV32IF-NEXT: add a2, a2, a3
-; RV32IF-NEXT: add a2, a4, a2
-; RV32IF-NEXT: add a2, a2, a6
-; RV32IF-NEXT: j .LBB18_7
-; RV32IF-NEXT: .LBB18_6: # %fp-to-i-if-else
+; RV32IF-NEXT: add a2, s6, a0
+; RV32IF-NEXT: sltu a3, s8, s1
+; RV32IF-NEXT: sltu a4, s11, s10
+; RV32IF-NEXT: add a1, s7, a1
+; RV32IF-NEXT: add a0, s8, a2
+; RV32IF-NEXT: add a4, s9, a4
+; RV32IF-NEXT: sltu a2, a2, s6
+; RV32IF-NEXT: sltu a5, a0, s8
+; RV32IF-NEXT: add a3, a4, a3
+; RV32IF-NEXT: add a1, a1, a2
+; RV32IF-NEXT: add a1, a3, a1
+; RV32IF-NEXT: add a1, a1, a5
+; RV32IF-NEXT: j .LBB18_6
+; RV32IF-NEXT: .LBB18_5: # %fp-to-i-if-else
; RV32IF-NEXT: sw a0, 56(sp)
; RV32IF-NEXT: sw a1, 60(sp)
; RV32IF-NEXT: sw zero, 64(sp)
@@ -1269,10 +1260,10 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: sll a2, a2, a0
; RV32IF-NEXT: sll a5, a5, a0
; RV32IF-NEXT: sll a1, a1, a0
-; RV32IF-NEXT: or s3, a2, a6
+; RV32IF-NEXT: or s4, a2, a6
; RV32IF-NEXT: or a2, a5, a7
; RV32IF-NEXT: or a3, a1, a3
-; RV32IF-NEXT: sll s4, a4, a0
+; RV32IF-NEXT: sll s3, a4, a0
; RV32IF-NEXT: mv a0, s8
; RV32IF-NEXT: mv a1, s0
; RV32IF-NEXT: call __muldi3
@@ -1280,21 +1271,21 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: mv s5, a1
; RV32IF-NEXT: mv a0, s0
; RV32IF-NEXT: mv a1, s0
-; RV32IF-NEXT: mv a2, s4
-; RV32IF-NEXT: mv a3, s3
+; RV32IF-NEXT: mv a2, s3
+; RV32IF-NEXT: mv a3, s4
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: add a1, a1, s5
; RV32IF-NEXT: add s1, a0, s2
; RV32IF-NEXT: sltu a0, s1, a0
; RV32IF-NEXT: add s7, a1, a0
-; RV32IF-NEXT: mv a0, s3
+; RV32IF-NEXT: mv a0, s4
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: mv s5, a0
; RV32IF-NEXT: mv s6, a1
-; RV32IF-NEXT: mv a0, s4
+; RV32IF-NEXT: mv a0, s3
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
@@ -1303,76 +1294,75 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: add s8, s5, a1
; RV32IF-NEXT: sltu a0, s8, s5
; RV32IF-NEXT: add s6, s6, a0
-; RV32IF-NEXT: mv a0, s4
+; RV32IF-NEXT: mv a0, s3
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: add s8, a0, s8
-; RV32IF-NEXT: sltu a0, s8, a0
+; RV32IF-NEXT: add s3, a0, s8
+; RV32IF-NEXT: sltu a0, s3, a0
; RV32IF-NEXT: add a0, a1, a0
-; RV32IF-NEXT: add s4, s6, a0
-; RV32IF-NEXT: sltu s5, s4, s6
-; RV32IF-NEXT: mv a0, s3
+; RV32IF-NEXT: add s5, s6, a0
+; RV32IF-NEXT: sltu s6, s5, s6
+; RV32IF-NEXT: mv a0, s4
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: mv a2, a1
-; RV32IF-NEXT: mv a1, s8
-; RV32IF-NEXT: add a2, a2, s5
-; RV32IF-NEXT: add s4, a0, s4
-; RV32IF-NEXT: sltu a3, s4, a0
-; RV32IF-NEXT: add a0, s4, s1
-; RV32IF-NEXT: add a2, a2, a3
-; RV32IF-NEXT: add a2, a2, s7
-; RV32IF-NEXT: sltu a3, a0, s4
-; RV32IF-NEXT: add a2, a2, a3
-; RV32IF-NEXT: .LBB18_7: # %fp-to-i-cleanup
-; RV32IF-NEXT: lui a3, 524288
-; RV32IF-NEXT: addi a4, a3, -1
-; RV32IF-NEXT: beq a1, a4, .LBB18_9
-; RV32IF-NEXT: # %bb.8: # %fp-to-i-cleanup
-; RV32IF-NEXT: sltu a5, a1, a4
-; RV32IF-NEXT: or a6, a0, a2
-; RV32IF-NEXT: bnez a6, .LBB18_10
-; RV32IF-NEXT: j .LBB18_11
-; RV32IF-NEXT: .LBB18_9:
-; RV32IF-NEXT: sltiu a5, s2, -1
-; RV32IF-NEXT: or a6, a0, a2
-; RV32IF-NEXT: beqz a6, .LBB18_11
+; RV32IF-NEXT: add a1, a1, s6
+; RV32IF-NEXT: add s5, a0, s5
+; RV32IF-NEXT: sltu a2, s5, a0
+; RV32IF-NEXT: add a0, s5, s1
+; RV32IF-NEXT: add a1, a1, a2
+; RV32IF-NEXT: add a1, a1, s7
+; RV32IF-NEXT: sltu a2, a0, s5
+; RV32IF-NEXT: add a1, a1, a2
+; RV32IF-NEXT: .LBB18_6: # %fp-to-i-cleanup
+; RV32IF-NEXT: lui a2, 524288
+; RV32IF-NEXT: addi a3, a2, -1
+; RV32IF-NEXT: beq s3, a3, .LBB18_8
+; RV32IF-NEXT: # %bb.7: # %fp-to-i-cleanup
+; RV32IF-NEXT: sltu a4, s3, a3
+; RV32IF-NEXT: or a5, a0, a1
+; RV32IF-NEXT: bnez a5, .LBB18_9
+; RV32IF-NEXT: j .LBB18_10
+; RV32IF-NEXT: .LBB18_8:
+; RV32IF-NEXT: sltiu a4, s2, -1
+; RV32IF-NEXT: or a5, a0, a1
+; RV32IF-NEXT: beqz a5, .LBB18_10
+; RV32IF-NEXT: .LBB18_9: # %fp-to-i-cleanup
+; RV32IF-NEXT: srli a4, a1, 31
; RV32IF-NEXT: .LBB18_10: # %fp-to-i-cleanup
-; RV32IF-NEXT: srli a5, a2, 31
-; RV32IF-NEXT: .LBB18_11: # %fp-to-i-cleanup
-; RV32IF-NEXT: neg a6, a5
-; RV32IF-NEXT: addi a7, a5, -1
-; RV32IF-NEXT: bnez a5, .LBB18_13
-; RV32IF-NEXT: # %bb.12: # %fp-to-i-cleanup
-; RV32IF-NEXT: mv a1, a4
-; RV32IF-NEXT: .LBB18_13: # %fp-to-i-cleanup
-; RV32IF-NEXT: or a4, a7, s2
-; RV32IF-NEXT: and a2, a6, a2
-; RV32IF-NEXT: and a5, a6, a0
-; RV32IF-NEXT: beq a1, a3, .LBB18_15
-; RV32IF-NEXT: # %bb.14: # %fp-to-i-cleanup
-; RV32IF-NEXT: sltu a0, a3, a1
-; RV32IF-NEXT: j .LBB18_16
-; RV32IF-NEXT: .LBB18_15:
-; RV32IF-NEXT: snez a0, a4
-; RV32IF-NEXT: .LBB18_16: # %fp-to-i-cleanup
-; RV32IF-NEXT: and a5, a5, a2
-; RV32IF-NEXT: li a3, -1
-; RV32IF-NEXT: beq a5, a3, .LBB18_18
-; RV32IF-NEXT: # %bb.17: # %fp-to-i-cleanup
-; RV32IF-NEXT: srli a2, a2, 31
-; RV32IF-NEXT: xori a0, a2, 1
-; RV32IF-NEXT: .LBB18_18: # %fp-to-i-cleanup
-; RV32IF-NEXT: bnez a0, .LBB18_20
-; RV32IF-NEXT: # %bb.19: # %fp-to-i-cleanup
-; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: .LBB18_20: # %fp-to-i-cleanup
+; RV32IF-NEXT: neg a5, a4
+; RV32IF-NEXT: addi a6, a4, -1
+; RV32IF-NEXT: bnez a4, .LBB18_12
+; RV32IF-NEXT: # %bb.11: # %fp-to-i-cleanup
+; RV32IF-NEXT: mv s3, a3
+; RV32IF-NEXT: .LBB18_12: # %fp-to-i-cleanup
+; RV32IF-NEXT: or a3, a6, s2
+; RV32IF-NEXT: and a1, a5, a1
+; RV32IF-NEXT: and a4, a5, a0
+; RV32IF-NEXT: beq s3, a2, .LBB18_14
+; RV32IF-NEXT: # %bb.13: # %fp-to-i-cleanup
+; RV32IF-NEXT: sltu a0, a2, s3
+; RV32IF-NEXT: j .LBB18_15
+; RV32IF-NEXT: .LBB18_14:
+; RV32IF-NEXT: snez a0, a3
+; RV32IF-NEXT: .LBB18_15: # %fp-to-i-cleanup
+; RV32IF-NEXT: and a4, a4, a1
+; RV32IF-NEXT: li a2, -1
+; RV32IF-NEXT: beq a4, a2, .LBB18_17
+; RV32IF-NEXT: # %bb.16: # %fp-to-i-cleanup
+; RV32IF-NEXT: srli a1, a1, 31
+; RV32IF-NEXT: xori a0, a1, 1
+; RV32IF-NEXT: .LBB18_17: # %fp-to-i-cleanup
+; RV32IF-NEXT: bnez a0, .LBB18_19
+; RV32IF-NEXT: # %bb.18: # %fp-to-i-cleanup
+; RV32IF-NEXT: lui s3, 524288
+; RV32IF-NEXT: .LBB18_19: # %fp-to-i-cleanup
; RV32IF-NEXT: neg a0, a0
-; RV32IF-NEXT: and a0, a0, a4
+; RV32IF-NEXT: and a0, a0, a3
+; RV32IF-NEXT: mv a1, s3
; RV32IF-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
@@ -1402,6 +1392,15 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: addi sp, sp, 128
; RV32IF-NEXT: .cfi_def_cfa_offset 0
; RV32IF-NEXT: ret
+; RV32IF-NEXT: .LBB18_20: # %fp-to-i-if-then5
+; RV32IF-NEXT: .cfi_restore_state
+; RV32IF-NEXT: xori a0, a3, 1
+; RV32IF-NEXT: lui a1, 524288
+; RV32IF-NEXT: neg s2, a0
+; RV32IF-NEXT: sub a1, a1, a0
+; RV32IF-NEXT: mv s3, s2
+; RV32IF-NEXT: mv a0, s2
+; RV32IF-NEXT: j .LBB18_6
;
; RV64IF-LABEL: stest_f64i64:
; RV64IF: # %bb.0: # %entry
@@ -1474,6 +1473,7 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: .cfi_offset s9, -44
; RV32IFD-NEXT: .cfi_offset s10, -48
; RV32IFD-NEXT: .cfi_offset s11, -52
+; RV32IFD-NEXT: .cfi_remember_state
; RV32IFD-NEXT: fsd fa0, 16(sp)
; RV32IFD-NEXT: lw a2, 20(sp)
; RV32IFD-NEXT: slli a0, a2, 1
@@ -1482,26 +1482,18 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: bgeu a0, a1, .LBB18_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: li s2, 0
-; RV32IFD-NEXT: li a1, 0
+; RV32IFD-NEXT: li s3, 0
; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: li a2, 0
-; RV32IFD-NEXT: j .LBB18_7
+; RV32IFD-NEXT: li a1, 0
+; RV32IFD-NEXT: j .LBB18_6
; RV32IFD-NEXT: .LBB18_2: # %fp-to-i-if-end
; RV32IFD-NEXT: addi a1, a0, -1151
; RV32IFD-NEXT: sltu a3, a1, a0
; RV32IFD-NEXT: sltiu a1, a1, -128
; RV32IFD-NEXT: or a1, a3, a1
; RV32IFD-NEXT: srli a3, a2, 31
-; RV32IFD-NEXT: beqz a1, .LBB18_4
-; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT: xori a0, a3, 1
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: neg s2, a0
-; RV32IFD-NEXT: sub a2, a2, a0
-; RV32IFD-NEXT: mv a1, s2
-; RV32IFD-NEXT: mv a0, s2
-; RV32IFD-NEXT: j .LBB18_7
-; RV32IFD-NEXT: .LBB18_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT: bnez a1, .LBB18_20
+; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IFD-NEXT: lw a1, 16(sp)
; RV32IFD-NEXT: neg s0, a3
; RV32IFD-NEXT: slli a2, a2, 12
@@ -1510,8 +1502,8 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: or a2, a2, a3
; RV32IFD-NEXT: li a3, 1074
; RV32IFD-NEXT: ori s8, s0, 1
-; RV32IFD-NEXT: bltu a3, a0, .LBB18_6
-; RV32IFD-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT: bltu a3, a0, .LBB18_5
+; RV32IFD-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IFD-NEXT: sw zero, 40(sp)
; RV32IFD-NEXT: sw zero, 44(sp)
; RV32IFD-NEXT: sw zero, 48(sp)
@@ -1543,70 +1535,68 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: srl a0, a0, a1
; RV32IFD-NEXT: or a3, a3, a6
; RV32IFD-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: or s5, a2, a7
-; RV32IFD-NEXT: or s6, a0, a5
-; RV32IFD-NEXT: srl s4, a4, a1
-; RV32IFD-NEXT: mv a0, s6
+; RV32IFD-NEXT: or s6, a2, a7
+; RV32IFD-NEXT: or s7, a0, a5
+; RV32IFD-NEXT: srl s5, a4, a1
+; RV32IFD-NEXT: mv a0, s7
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: mv s2, a0
-; RV32IFD-NEXT: mv s7, a1
-; RV32IFD-NEXT: mv a0, s5
+; RV32IFD-NEXT: mv s3, a1
+; RV32IFD-NEXT: mv a0, s6
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: add s7, a0, s7
-; RV32IFD-NEXT: sltu a0, s7, a0
-; RV32IFD-NEXT: add s9, a1, a0
-; RV32IFD-NEXT: mv a0, s6
+; RV32IFD-NEXT: add s3, a0, s3
+; RV32IFD-NEXT: sltu a0, s3, a0
+; RV32IFD-NEXT: add s10, a1, a0
+; RV32IFD-NEXT: mv a0, s7
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: add s10, a0, s7
-; RV32IFD-NEXT: sltu a0, s10, a0
+; RV32IFD-NEXT: add s3, a0, s3
+; RV32IFD-NEXT: sltu a0, s3, a0
; RV32IFD-NEXT: add a0, a1, a0
-; RV32IFD-NEXT: add s11, s9, a0
-; RV32IFD-NEXT: mv a0, s5
+; RV32IFD-NEXT: add s11, s10, a0
+; RV32IFD-NEXT: mv a0, s6
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: mv s1, a0
-; RV32IFD-NEXT: mv s3, s8
-; RV32IFD-NEXT: mv s8, a1
-; RV32IFD-NEXT: add s7, a0, s11
+; RV32IFD-NEXT: mv s9, a1
+; RV32IFD-NEXT: mv s4, s8
+; RV32IFD-NEXT: add s8, a0, s11
; RV32IFD-NEXT: mv a0, s0
; RV32IFD-NEXT: mv a1, s0
-; RV32IFD-NEXT: mv a2, s6
-; RV32IFD-NEXT: mv a3, s5
+; RV32IFD-NEXT: mv a2, s7
+; RV32IFD-NEXT: mv a3, s6
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: mv s5, a0
-; RV32IFD-NEXT: mv s6, a1
-; RV32IFD-NEXT: mv a0, s3
+; RV32IFD-NEXT: mv s6, a0
+; RV32IFD-NEXT: mv s7, a1
+; RV32IFD-NEXT: mv a0, s4
; RV32IFD-NEXT: mv a1, s0
; RV32IFD-NEXT: lw a2, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: mv a3, s4
+; RV32IFD-NEXT: mv a3, s5
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: mv a2, a1
-; RV32IFD-NEXT: mv a1, s10
-; RV32IFD-NEXT: add a3, s5, a0
-; RV32IFD-NEXT: sltu a4, s7, s1
-; RV32IFD-NEXT: sltu a5, s11, s9
-; RV32IFD-NEXT: add a2, s6, a2
-; RV32IFD-NEXT: add a0, s7, a3
-; RV32IFD-NEXT: add a5, s8, a5
-; RV32IFD-NEXT: sltu a3, a3, s5
-; RV32IFD-NEXT: sltu a6, a0, s7
-; RV32IFD-NEXT: add a4, a5, a4
-; RV32IFD-NEXT: add a2, a2, a3
-; RV32IFD-NEXT: add a2, a4, a2
-; RV32IFD-NEXT: add a2, a2, a6
-; RV32IFD-NEXT: j .LBB18_7
-; RV32IFD-NEXT: .LBB18_6: # %fp-to-i-if-else
+; RV32IFD-NEXT: add a2, s6, a0
+; RV32IFD-NEXT: sltu a3, s8, s1
+; RV32IFD-NEXT: sltu a4, s11, s10
+; RV32IFD-NEXT: add a1, s7, a1
+; RV32IFD-NEXT: add a0, s8, a2
+; RV32IFD-NEXT: add a4, s9, a4
+; RV32IFD-NEXT: sltu a2, a2, s6
+; RV32IFD-NEXT: sltu a5, a0, s8
+; RV32IFD-NEXT: add a3, a4, a3
+; RV32IFD-NEXT: add a1, a1, a2
+; RV32IFD-NEXT: add a1, a3, a1
+; RV32IFD-NEXT: add a1, a1, a5
+; RV32IFD-NEXT: j .LBB18_6
+; RV32IFD-NEXT: .LBB18_5: # %fp-to-i-if-else
; RV32IFD-NEXT: sw a1, 72(sp)
; RV32IFD-NEXT: sw a2, 76(sp)
; RV32IFD-NEXT: sw zero, 80(sp)
@@ -1635,10 +1625,10 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: sll a2, a2, a0
; RV32IFD-NEXT: sll a5, a5, a0
; RV32IFD-NEXT: sll a1, a1, a0
-; RV32IFD-NEXT: or s3, a2, a6
+; RV32IFD-NEXT: or s4, a2, a6
; RV32IFD-NEXT: or a2, a5, a7
; RV32IFD-NEXT: or a3, a1, a3
-; RV32IFD-NEXT: sll s4, a4, a0
+; RV32IFD-NEXT: sll s3, a4, a0
; RV32IFD-NEXT: mv a0, s8
; RV32IFD-NEXT: mv a1, s0
; RV32IFD-NEXT: call __muldi3
@@ -1646,21 +1636,21 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: mv s5, a1
; RV32IFD-NEXT: mv a0, s0
; RV32IFD-NEXT: mv a1, s0
-; RV32IFD-NEXT: mv a2, s4
-; RV32IFD-NEXT: mv a3, s3
+; RV32IFD-NEXT: mv a2, s3
+; RV32IFD-NEXT: mv a3, s4
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: add a1, a1, s5
; RV32IFD-NEXT: add s1, a0, s2
; RV32IFD-NEXT: sltu a0, s1, a0
; RV32IFD-NEXT: add s7, a1, a0
-; RV32IFD-NEXT: mv a0, s3
+; RV32IFD-NEXT: mv a0, s4
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: mv s5, a0
; RV32IFD-NEXT: mv s6, a1
-; RV32IFD-NEXT: mv a0, s4
+; RV32IFD-NEXT: mv a0, s3
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
@@ -1669,76 +1659,75 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: add s8, s5, a1
; RV32IFD-NEXT: sltu a0, s8, s5
; RV32IFD-NEXT: add s6, s6, a0
-; RV32IFD-NEXT: mv a0, s4
+; RV32IFD-NEXT: mv a0, s3
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: add s8, a0, s8
-; RV32IFD-NEXT: sltu a0, s8, a0
+; RV32IFD-NEXT: add s3, a0, s8
+; RV32IFD-NEXT: sltu a0, s3, a0
; RV32IFD-NEXT: add a0, a1, a0
-; RV32IFD-NEXT: add s4, s6, a0
-; RV32IFD-NEXT: sltu s5, s4, s6
-; RV32IFD-NEXT: mv a0, s3
+; RV32IFD-NEXT: add s5, s6, a0
+; RV32IFD-NEXT: sltu s6, s5, s6
+; RV32IFD-NEXT: mv a0, s4
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: mv a2, a1
-; RV32IFD-NEXT: mv a1, s8
-; RV32IFD-NEXT: add a2, a2, s5
-; RV32IFD-NEXT: add s4, a0, s4
-; RV32IFD-NEXT: sltu a3, s4, a0
-; RV32IFD-NEXT: add a0, s4, s1
-; RV32IFD-NEXT: add a2, a2, a3
-; RV32IFD-NEXT: add a2, a2, s7
-; RV32IFD-NEXT: sltu a3, a0, s4
-; RV32IFD-NEXT: add a2, a2, a3
-; RV32IFD-NEXT: .LBB18_7: # %fp-to-i-cleanup
-; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: addi a4, a3, -1
-; RV32IFD-NEXT: beq a1, a4, .LBB18_9
-; RV32IFD-NEXT: # %bb.8: # %fp-to-i-cleanup
-; RV32IFD-NEXT: sltu a5, a1, a4
-; RV32IFD-NEXT: or a6, a0, a2
-; RV32IFD-NEXT: bnez a6, .LBB18_10
-; RV32IFD-NEXT: j .LBB18_11
-; RV32IFD-NEXT: .LBB18_9:
-; RV32IFD-NEXT: sltiu a5, s2, -1
-; RV32IFD-NEXT: or a6, a0, a2
-; RV32IFD-NEXT: beqz a6, .LBB18_11
+; RV32IFD-NEXT: add a1, a1, s6
+; RV32IFD-NEXT: add s5, a0, s5
+; RV32IFD-NEXT: sltu a2, s5, a0
+; RV32IFD-NEXT: add a0, s5, s1
+; RV32IFD-NEXT: add a1, a1, a2
+; RV32IFD-NEXT: add a1, a1, s7
+; RV32IFD-NEXT: sltu a2, a0, s5
+; RV32IFD-NEXT: add a1, a1, a2
+; RV32IFD-NEXT: .LBB18_6: # %fp-to-i-cleanup
+; RV32IFD-NEXT: lui a2, 524288
+; RV32IFD-NEXT: addi a3, a2, -1
+; RV32IFD-NEXT: beq s3, a3, .LBB18_8
+; RV32IFD-NEXT: # %bb.7: # %fp-to-i-cleanup
+; RV32IFD-NEXT: sltu a4, s3, a3
+; RV32IFD-NEXT: or a5, a0, a1
+; RV32IFD-NEXT: bnez a5, .LBB18_9
+; RV32IFD-NEXT: j .LBB18_10
+; RV32IFD-NEXT: .LBB18_8:
+; RV32IFD-NEXT: sltiu a4, s2, -1
+; RV32IFD-NEXT: or a5, a0, a1
+; RV32IFD-NEXT: beqz a5, .LBB18_10
+; RV32IFD-NEXT: .LBB18_9: # %fp-to-i-cleanup
+; RV32IFD-NEXT: srli a4, a1, 31
; RV32IFD-NEXT: .LBB18_10: # %fp-to-i-cleanup
-; RV32IFD-NEXT: srli a5, a2, 31
-; RV32IFD-NEXT: .LBB18_11: # %fp-to-i-cleanup
-; RV32IFD-NEXT: neg a6, a5
-; RV32IFD-NEXT: addi a7, a5, -1
-; RV32IFD-NEXT: bnez a5, .LBB18_13
-; RV32IFD-NEXT: # %bb.12: # %fp-to-i-cleanup
-; RV32IFD-NEXT: mv a1, a4
-; RV32IFD-NEXT: .LBB18_13: # %fp-to-i-cleanup
-; RV32IFD-NEXT: or a4, a7, s2
-; RV32IFD-NEXT: and a2, a6, a2
-; RV32IFD-NEXT: and a5, a6, a0
-; RV32IFD-NEXT: beq a1, a3, .LBB18_15
-; RV32IFD-NEXT: # %bb.14: # %fp-to-i-cleanup
-; RV32IFD-NEXT: sltu a0, a3, a1
-; RV32IFD-NEXT: j .LBB18_16
-; RV32IFD-NEXT: .LBB18_15:
-; RV32IFD-NEXT: snez a0, a4
-; RV32IFD-NEXT: .LBB18_16: # %fp-to-i-cleanup
-; RV32IFD-NEXT: and a5, a5, a2
-; RV32IFD-NEXT: li a3, -1
-; RV32IFD-NEXT: beq a5, a3, .LBB18_18
-; RV32IFD-NEXT: # %bb.17: # %fp-to-i-cleanup
-; RV32IFD-NEXT: srli a2, a2, 31
-; RV32IFD-NEXT: xori a0, a2, 1
-; RV32IFD-NEXT: .LBB18_18: # %fp-to-i-cleanup
-; RV32IFD-NEXT: bnez a0, .LBB18_20
-; RV32IFD-NEXT: # %bb.19: # %fp-to-i-cleanup
-; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: .LBB18_20: # %fp-to-i-cleanup
+; RV32IFD-NEXT: neg a5, a4
+; RV32IFD-NEXT: addi a6, a4, -1
+; RV32IFD-NEXT: bnez a4, .LBB18_12
+; RV32IFD-NEXT: # %bb.11: # %fp-to-i-cleanup
+; RV32IFD-NEXT: mv s3, a3
+; RV32IFD-NEXT: .LBB18_12: # %fp-to-i-cleanup
+; RV32IFD-NEXT: or a3, a6, s2
+; RV32IFD-NEXT: and a1, a5, a1
+; RV32IFD-NEXT: and a4, a5, a0
+; RV32IFD-NEXT: beq s3, a2, .LBB18_14
+; RV32IFD-NEXT: # %bb.13: # %fp-to-i-cleanup
+; RV32IFD-NEXT: sltu a0, a2, s3
+; RV32IFD-NEXT: j .LBB18_15
+; RV32IFD-NEXT: .LBB18_14:
+; RV32IFD-NEXT: snez a0, a3
+; RV32IFD-NEXT: .LBB18_15: # %fp-to-i-cleanup
+; RV32IFD-NEXT: and a4, a4, a1
+; RV32IFD-NEXT: li a2, -1
+; RV32IFD-NEXT: beq a4, a2, .LBB18_17
+; RV32IFD-NEXT: # %bb.16: # %fp-to-i-cleanup
+; RV32IFD-NEXT: srli a1, a1, 31
+; RV32IFD-NEXT: xori a0, a1, 1
+; RV32IFD-NEXT: .LBB18_17: # %fp-to-i-cleanup
+; RV32IFD-NEXT: bnez a0, .LBB18_19
+; RV32IFD-NEXT: # %bb.18: # %fp-to-i-cleanup
+; RV32IFD-NEXT: lui s3, 524288
+; RV32IFD-NEXT: .LBB18_19: # %fp-to-i-cleanup
; RV32IFD-NEXT: neg a0, a0
-; RV32IFD-NEXT: and a0, a0, a4
+; RV32IFD-NEXT: and a0, a0, a3
+; RV32IFD-NEXT: mv a1, s3
; RV32IFD-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
@@ -1768,6 +1757,15 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: addi sp, sp, 144
; RV32IFD-NEXT: .cfi_def_cfa_offset 0
; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB18_20: # %fp-to-i-if-then5
+; RV32IFD-NEXT: .cfi_restore_state
+; RV32IFD-NEXT: xori a0, a3, 1
+; RV32IFD-NEXT: lui a1, 524288
+; RV32IFD-NEXT: neg s2, a0
+; RV32IFD-NEXT: sub a1, a1, a0
+; RV32IFD-NEXT: mv s3, s2
+; RV32IFD-NEXT: mv a0, s2
+; RV32IFD-NEXT: j .LBB18_6
;
; RV64IFD-LABEL: stest_f64i64:
; RV64IFD: # %bb.0: # %entry
@@ -1818,6 +1816,7 @@ define i64 @utest_f64i64(double %x) {
; RV32IF-NEXT: .cfi_offset s9, -44
; RV32IF-NEXT: .cfi_offset s10, -48
; RV32IF-NEXT: .cfi_offset s11, -52
+; RV32IF-NEXT: .cfi_remember_state
; RV32IF-NEXT: slli a2, a1, 1
; RV32IF-NEXT: srli a2, a2, 21
; RV32IF-NEXT: li a3, 1023
@@ -1827,23 +1826,15 @@ define i64 @utest_f64i64(double %x) {
; RV32IF-NEXT: li s9, 0
; RV32IF-NEXT: li a0, 0
; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: j .LBB19_7
+; RV32IF-NEXT: j .LBB19_6
; RV32IF-NEXT: .LBB19_2: # %fp-to-i-if-end
; RV32IF-NEXT: addi a3, a2, -1151
; RV32IF-NEXT: sltu a4, a3, a2
; RV32IF-NEXT: sltiu a3, a3, -128
; RV32IF-NEXT: or a4, a4, a3
; RV32IF-NEXT: srli a3, a1, 31
-; RV32IF-NEXT: beqz a4, .LBB19_4
-; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT: xori a0, a3, 1
-; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: neg s2, a0
-; RV32IF-NEXT: sub a1, a1, a0
-; RV32IF-NEXT: mv s9, s2
-; RV32IF-NEXT: mv a0, s2
-; RV32IF-NEXT: j .LBB19_7
-; RV32IF-NEXT: .LBB19_4: # %fp-to-i-if-end9
+; RV32IF-NEXT: bnez a4, .LBB19_7
+; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IF-NEXT: neg s0, a3
; RV32IF-NEXT: slli a1, a1, 12
; RV32IF-NEXT: lui a3, 256
@@ -1851,8 +1842,8 @@ define i64 @utest_f64i64(double %x) {
; RV32IF-NEXT: srli a1, a1, 12
; RV32IF-NEXT: or a1, a1, a3
; RV32IF-NEXT: ori s8, s0, 1
-; RV32IF-NEXT: bltu a4, a2, .LBB19_6
-; RV32IF-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT: bltu a4, a2, .LBB19_5
+; RV32IF-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IF-NEXT: sw zero, 24(sp)
; RV32IF-NEXT: sw zero, 28(sp)
; RV32IF-NEXT: sw zero, 32(sp)
@@ -1944,8 +1935,8 @@ define i64 @utest_f64i64(double %x) {
; RV32IF-NEXT: add a1, a1, a2
; RV32IF-NEXT: add a1, a3, a1
; RV32IF-NEXT: add a1, a1, a5
-; RV32IF-NEXT: j .LBB19_7
-; RV32IF-NEXT: .LBB19_6: # %fp-to-i-if-else
+; RV32IF-NEXT: j .LBB19_6
+; RV32IF-NEXT: .LBB19_5: # %fp-to-i-if-else
; RV32IF-NEXT: sw a0, 56(sp)
; RV32IF-NEXT: sw a1, 60(sp)
; RV32IF-NEXT: sw zero, 64(sp)
@@ -2031,7 +2022,7 @@ define i64 @utest_f64i64(double %x) {
; RV32IF-NEXT: add a1, a1, s7
; RV32IF-NEXT: sltu a2, a0, s4
; RV32IF-NEXT: add a1, a1, a2
-; RV32IF-NEXT: .LBB19_7: # %fp-to-i-cleanup
+; RV32IF-NEXT: .LBB19_6: # %fp-to-i-cleanup
; RV32IF-NEXT: or a2, a1, a0
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: seqz a2, a2
@@ -2071,6 +2062,15 @@ define i64 @utest_f64i64(double %x) {
; RV32IF-NEXT: addi sp, sp, 128
; RV32IF-NEXT: .cfi_def_cfa_offset 0
; RV32IF-NEXT: ret
+; RV32IF-NEXT: .LBB19_7: # %fp-to-i-if-then5
+; RV32IF-NEXT: .cfi_restore_state
+; RV32IF-NEXT: xori a0, a3, 1
+; RV32IF-NEXT: lui a1, 524288
+; RV32IF-NEXT: neg s2, a0
+; RV32IF-NEXT: sub a1, a1, a0
+; RV32IF-NEXT: mv s9, s2
+; RV32IF-NEXT: mv a0, s2
+; RV32IF-NEXT: j .LBB19_6
;
; RV64-LABEL: utest_f64i64:
; RV64: # %bb.0: # %entry
@@ -2118,6 +2118,7 @@ define i64 @utest_f64i64(double %x) {
; RV32IFD-NEXT: .cfi_offset s9, -44
; RV32IFD-NEXT: .cfi_offset s10, -48
; RV32IFD-NEXT: .cfi_offset s11, -52
+; RV32IFD-NEXT: .cfi_remember_state
; RV32IFD-NEXT: fsd fa0, 16(sp)
; RV32IFD-NEXT: lw a2, 20(sp)
; RV32IFD-NEXT: slli a0, a2, 1
@@ -2129,23 +2130,15 @@ define i64 @utest_f64i64(double %x) {
; RV32IFD-NEXT: li s9, 0
; RV32IFD-NEXT: li a0, 0
; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: j .LBB19_7
+; RV32IFD-NEXT: j .LBB19_6
; RV32IFD-NEXT: .LBB19_2: # %fp-to-i-if-end
; RV32IFD-NEXT: addi a1, a0, -1151
; RV32IFD-NEXT: sltu a3, a1, a0
; RV32IFD-NEXT: sltiu a1, a1, -128
; RV32IFD-NEXT: or a1, a3, a1
; RV32IFD-NEXT: srli a3, a2, 31
-; RV32IFD-NEXT: beqz a1, .LBB19_4
-; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT: xori a0, a3, 1
-; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: neg s2, a0
-; RV32IFD-NEXT: sub a1, a1, a0
-; RV32IFD-NEXT: mv s9, s2
-; RV32IFD-NEXT: mv a0, s2
-; RV32IFD-NEXT: j .LBB19_7
-; RV32IFD-NEXT: .LBB19_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT: bnez a1, .LBB19_7
+; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IFD-NEXT: lw a1, 16(sp)
; RV32IFD-NEXT: neg s0, a3
; RV32IFD-NEXT: slli a2, a2, 12
@@ -2154,8 +2147,8 @@ define i64 @utest_f64i64(double %x) {
; RV32IFD-NEXT: or a2, a2, a3
; RV32IFD-NEXT: li a3, 1074
; RV32IFD-NEXT: ori s8, s0, 1
-; RV32IFD-NEXT: bltu a3, a0, .LBB19_6
-; RV32IFD-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT: bltu a3, a0, .LBB19_5
+; RV32IFD-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IFD-NEXT: sw zero, 40(sp)
; RV32IFD-NEXT: sw zero, 44(sp)
; RV32IFD-NEXT: sw zero, 48(sp)
@@ -2247,8 +2240,8 @@ define i64 @utest_f64i64(double %x) {
; RV32IFD-NEXT: add a1, a1, a2
; RV32IFD-NEXT: add a1, a3, a1
; RV32IFD-NEXT: add a1, a1, a5
-; RV32IFD-NEXT: j .LBB19_7
-; RV32IFD-NEXT: .LBB19_6: # %fp-to-i-if-else
+; RV32IFD-NEXT: j .LBB19_6
+; RV32IFD-NEXT: .LBB19_5: # %fp-to-i-if-else
; RV32IFD-NEXT: sw a1, 72(sp)
; RV32IFD-NEXT: sw a2, 76(sp)
; RV32IFD-NEXT: sw zero, 80(sp)
@@ -2334,7 +2327,7 @@ define i64 @utest_f64i64(double %x) {
; RV32IFD-NEXT: add a1, a1, s7
; RV32IFD-NEXT: sltu a2, a0, s4
; RV32IFD-NEXT: add a1, a1, a2
-; RV32IFD-NEXT: .LBB19_7: # %fp-to-i-cleanup
+; RV32IFD-NEXT: .LBB19_6: # %fp-to-i-cleanup
; RV32IFD-NEXT: or a2, a1, a0
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: seqz a2, a2
@@ -2374,6 +2367,15 @@ define i64 @utest_f64i64(double %x) {
; RV32IFD-NEXT: addi sp, sp, 144
; RV32IFD-NEXT: .cfi_def_cfa_offset 0
; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB19_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT: .cfi_restore_state
+; RV32IFD-NEXT: xori a0, a3, 1
+; RV32IFD-NEXT: lui a1, 524288
+; RV32IFD-NEXT: neg s2, a0
+; RV32IFD-NEXT: sub a1, a1, a0
+; RV32IFD-NEXT: mv s9, s2
+; RV32IFD-NEXT: mv a0, s2
+; RV32IFD-NEXT: j .LBB19_6
entry:
%conv = fptoui double %x to i128
%0 = icmp ult i128 %conv, 18446744073709551616
@@ -2422,24 +2424,15 @@ define i64 @ustest_f64i64(double %x) {
; RV32IF-NEXT: li s9, 0
; RV32IF-NEXT: li a0, 0
; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: j .LBB20_6
+; RV32IF-NEXT: j .LBB20_8
; RV32IF-NEXT: .LBB20_2: # %fp-to-i-if-end
; RV32IF-NEXT: addi a3, a2, -1151
; RV32IF-NEXT: sltu a4, a3, a2
; RV32IF-NEXT: sltiu a3, a3, -128
; RV32IF-NEXT: or a4, a4, a3
; RV32IF-NEXT: srli a3, a1, 31
-; RV32IF-NEXT: beqz a4, .LBB20_4
-; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT: xori a0, a3, 1
-; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: neg s2, a0
-; RV32IF-NEXT: sub a1, a1, a0
-; RV32IF-NEXT: mv s9, s2
-; RV32IF-NEXT: mv a0, s2
-; RV32IF-NEXT: beqz a1, .LBB20_6
-; RV32IF-NEXT: j .LBB20_8
-; RV32IF-NEXT: .LBB20_4: # %fp-to-i-if-end9
+; RV32IF-NEXT: bnez a4, .LBB20_7
+; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IF-NEXT: neg s0, a3
; RV32IF-NEXT: slli a1, a1, 12
; RV32IF-NEXT: lui a3, 256
@@ -2447,8 +2440,8 @@ define i64 @ustest_f64i64(double %x) {
; RV32IF-NEXT: srli a1, a1, 12
; RV32IF-NEXT: or a1, a1, a3
; RV32IF-NEXT: ori s8, s0, 1
-; RV32IF-NEXT: bltu a4, a2, .LBB20_7
-; RV32IF-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT: bltu a4, a2, .LBB20_5
+; RV32IF-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IF-NEXT: sw zero, 24(sp)
; RV32IF-NEXT: sw zero, 28(sp)
; RV32IF-NEXT: sw zero, 32(sp)
@@ -2540,11 +2533,9 @@ define i64 @ustest_f64i64(double %x) {
; RV32IF-NEXT: add a1, a1, a2
; RV32IF-NEXT: add a1, a3, a1
; RV32IF-NEXT: add a1, a1, a5
-; RV32IF-NEXT: bnez a1, .LBB20_8
-; RV32IF-NEXT: .LBB20_6:
-; RV32IF-NEXT: seqz a2, a0
-; RV32IF-NEXT: j .LBB20_9
-; RV32IF-NEXT: .LBB20_7: # %fp-to-i-if-else
+; RV32IF-NEXT: bnez a1, .LBB20_6
+; RV32IF-NEXT: j .LBB20_8
+; RV32IF-NEXT: .LBB20_5: # %fp-to-i-if-else
; RV32IF-NEXT: sw a0, 56(sp)
; RV32IF-NEXT: sw a1, 60(sp)
; RV32IF-NEXT: sw zero, 64(sp)
@@ -2630,9 +2621,20 @@ define i64 @ustest_f64i64(double %x) {
; RV32IF-NEXT: add a1, a1, s7
; RV32IF-NEXT: sltu a2, a0, s4
; RV32IF-NEXT: add a1, a1, a2
-; RV32IF-NEXT: beqz a1, .LBB20_6
-; RV32IF-NEXT: .LBB20_8: # %fp-to-i-cleanup
+; RV32IF-NEXT: beqz a1, .LBB20_8
+; RV32IF-NEXT: .LBB20_6: # %fp-to-i-cleanup
; RV32IF-NEXT: srli a2, a1, 31
+; RV32IF-NEXT: j .LBB20_9
+; RV32IF-NEXT: .LBB20_7: # %fp-to-i-if-then5
+; RV32IF-NEXT: xori a0, a3, 1
+; RV32IF-NEXT: lui a1, 524288
+; RV32IF-NEXT: neg s2, a0
+; RV32IF-NEXT: sub a1, a1, a0
+; RV32IF-NEXT: mv s9, s2
+; RV32IF-NEXT: mv a0, s2
+; RV32IF-NEXT: bnez a1, .LBB20_6
+; RV32IF-NEXT: .LBB20_8:
+; RV32IF-NEXT: seqz a2, a0
; RV32IF-NEXT: .LBB20_9: # %fp-to-i-cleanup
; RV32IF-NEXT: xori a3, a0, 1
; RV32IF-NEXT: or a3, a3, a1
@@ -2763,24 +2765,15 @@ define i64 @ustest_f64i64(double %x) {
; RV32IFD-NEXT: li s9, 0
; RV32IFD-NEXT: li a0, 0
; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: j .LBB20_6
+; RV32IFD-NEXT: j .LBB20_8
; RV32IFD-NEXT: .LBB20_2: # %fp-to-i-if-end
; RV32IFD-NEXT: addi a1, a0, -1151
; RV32IFD-NEXT: sltu a3, a1, a0
; RV32IFD-NEXT: sltiu a1, a1, -128
; RV32IFD-NEXT: or a1, a3, a1
; RV32IFD-NEXT: srli a3, a2, 31
-; RV32IFD-NEXT: beqz a1, .LBB20_4
-; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT: xori a0, a3, 1
-; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: neg s2, a0
-; RV32IFD-NEXT: sub a1, a1, a0
-; RV32IFD-NEXT: mv s9, s2
-; RV32IFD-NEXT: mv a0, s2
-; RV32IFD-NEXT: beqz a1, .LBB20_6
-; RV32IFD-NEXT: j .LBB20_8
-; RV32IFD-NEXT: .LBB20_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT: bnez a1, .LBB20_7
+; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IFD-NEXT: lw a1, 16(sp)
; RV32IFD-NEXT: neg s0, a3
; RV32IFD-NEXT: slli a2, a2, 12
@@ -2789,8 +2782,8 @@ define i64 @ustest_f64i64(double %x) {
; RV32IFD-NEXT: or a2, a2, a3
; RV32IFD-NEXT: li a3, 1074
; RV32IFD-NEXT: ori s8, s0, 1
-; RV32IFD-NEXT: bltu a3, a0, .LBB20_7
-; RV32IFD-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT: bltu a3, a0, .LBB20_5
+; RV32IFD-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IFD-NEXT: sw zero, 40(sp)
; RV32IFD-NEXT: sw zero, 44(sp)
; RV32IFD-NEXT: sw zero, 48(sp)
@@ -2882,11 +2875,9 @@ define i64 @ustest_f64i64(double %x) {
; RV32IFD-NEXT: add a1, a1, a2
; RV32IFD-NEXT: add a1, a3, a1
; RV32IFD-NEXT: add a1, a1, a5
-; RV32IFD-NEXT: bnez a1, .LBB20_8
-; RV32IFD-NEXT: .LBB20_6:
-; RV32IFD-NEXT: seqz a2, a0
-; RV32IFD-NEXT: j .LBB20_9
-; RV32IFD-NEXT: .LBB20_7: # %fp-to-i-if-else
+; RV32IFD-NEXT: bnez a1, .LBB20_6
+; RV32IFD-NEXT: j .LBB20_8
+; RV32IFD-NEXT: .LBB20_5: # %fp-to-i-if-else
; RV32IFD-NEXT: sw a1, 72(sp)
; RV32IFD-NEXT: sw a2, 76(sp)
; RV32IFD-NEXT: sw zero, 80(sp)
@@ -2972,9 +2963,20 @@ define i64 @ustest_f64i64(double %x) {
; RV32IFD-NEXT: add a1, a1, s7
; RV32IFD-NEXT: sltu a2, a0, s4
; RV32IFD-NEXT: add a1, a1, a2
-; RV32IFD-NEXT: beqz a1, .LBB20_6
-; RV32IFD-NEXT: .LBB20_8: # %fp-to-i-cleanup
+; RV32IFD-NEXT: beqz a1, .LBB20_8
+; RV32IFD-NEXT: .LBB20_6: # %fp-to-i-cleanup
; RV32IFD-NEXT: srli a2, a1, 31
+; RV32IFD-NEXT: j .LBB20_9
+; RV32IFD-NEXT: .LBB20_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT: xori a0, a3, 1
+; RV32IFD-NEXT: lui a1, 524288
+; RV32IFD-NEXT: neg s2, a0
+; RV32IFD-NEXT: sub a1, a1, a0
+; RV32IFD-NEXT: mv s9, s2
+; RV32IFD-NEXT: mv a0, s2
+; RV32IFD-NEXT: bnez a1, .LBB20_6
+; RV32IFD-NEXT: .LBB20_8:
+; RV32IFD-NEXT: seqz a2, a0
; RV32IFD-NEXT: .LBB20_9: # %fp-to-i-cleanup
; RV32IFD-NEXT: xori a3, a0, 1
; RV32IFD-NEXT: or a3, a3, a1
@@ -3075,6 +3077,7 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: .cfi_offset s9, -44
; RV32-NEXT: .cfi_offset s10, -48
; RV32-NEXT: .cfi_offset s11, -52
+; RV32-NEXT: .cfi_remember_state
; RV32-NEXT: fmv.x.w a1, fa0
; RV32-NEXT: slli a0, a1, 1
; RV32-NEXT: srli a0, a0, 24
@@ -3082,26 +3085,17 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: bgeu a0, a2, .LBB21_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li s2, 0
-; RV32-NEXT: li a1, 0
+; RV32-NEXT: li s3, 0
; RV32-NEXT: li a0, 0
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: j .LBB21_7
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: j .LBB21_6
; RV32-NEXT: .LBB21_2: # %fp-to-i-if-end
; RV32-NEXT: addi a2, a0, -255
; RV32-NEXT: sltu a3, a2, a0
; RV32-NEXT: sltiu a2, a2, -128
; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB21_4
-; RV32-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: xori a2, a1, 1
-; RV32-NEXT: addi s2, a1, -1
-; RV32-NEXT: sub a2, a0, a2
-; RV32-NEXT: mv a1, s2
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: j .LBB21_7
-; RV32-NEXT: .LBB21_4: # %fp-to-i-if-end9
+; RV32-NEXT: bnez a2, .LBB21_20
+; RV32-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32-NEXT: srai s0, a1, 31
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: lui a2, 2048
@@ -3109,8 +3103,8 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: srli a1, a1, 9
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: ori s8, s0, 1
-; RV32-NEXT: bltu a3, a0, .LBB21_6
-; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT: bltu a3, a0, .LBB21_5
+; RV32-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: sw zero, 28(sp)
; RV32-NEXT: sw zero, 32(sp)
@@ -3142,70 +3136,68 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: srl a0, a0, a1
; RV32-NEXT: or a3, a3, a6
; RV32-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT: or s5, a2, a7
-; RV32-NEXT: or s6, a0, a5
-; RV32-NEXT: srl s4, a4, a1
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: or s6, a2, a7
+; RV32-NEXT: or s7, a0, a5
+; RV32-NEXT: srl s5, a4, a1
+; RV32-NEXT: mv a0, s7
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s2, a0
-; RV32-NEXT: mv s7, a1
-; RV32-NEXT: mv a0, s5
+; RV32-NEXT: mv s3, a1
+; RV32-NEXT: mv a0, s6
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s7, a0, s7
-; RV32-NEXT: sltu a0, s7, a0
-; RV32-NEXT: add s9, a1, a0
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: add s3, a0, s3
+; RV32-NEXT: sltu a0, s3, a0
+; RV32-NEXT: add s10, a1, a0
+; RV32-NEXT: mv a0, s7
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s10, a0, s7
-; RV32-NEXT: sltu a0, s10, a0
+; RV32-NEXT: add s3, a0, s3
+; RV32-NEXT: sltu a0, s3, a0
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s11, s9, a0
-; RV32-NEXT: mv a0, s5
+; RV32-NEXT: add s11, s10, a0
+; RV32-NEXT: mv a0, s6
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s1, a0
-; RV32-NEXT: mv s3, s8
-; RV32-NEXT: mv s8, a1
-; RV32-NEXT: add s7, a0, s11
+; RV32-NEXT: mv s9, a1
+; RV32-NEXT: mv s4, s8
+; RV32-NEXT: add s8, a0, s11
; RV32-NEXT: mv a0, s0
; RV32-NEXT: mv a1, s0
-; RV32-NEXT: mv a2, s6
-; RV32-NEXT: mv a3, s5
+; RV32-NEXT: mv a2, s7
+; RV32-NEXT: mv a3, s6
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s5, a0
-; RV32-NEXT: mv s6, a1
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: mv s6, a0
+; RV32-NEXT: mv s7, a1
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: mv a1, s0
; RV32-NEXT: lw a2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a3, s4
+; RV32-NEXT: mv a3, s5
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: mv a1, s10
-; RV32-NEXT: add a3, s5, a0
-; RV32-NEXT: sltu a4, s7, s1
-; RV32-NEXT: sltu a5, s11, s9
-; RV32-NEXT: add a2, s6, a2
-; RV32-NEXT: add a0, s7, a3
-; RV32-NEXT: add a5, s8, a5
-; RV32-NEXT: sltu a3, a3, s5
-; RV32-NEXT: sltu a6, a0, s7
-; RV32-NEXT: add a4, a5, a4
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: add a2, a4, a2
-; RV32-NEXT: add a2, a2, a6
-; RV32-NEXT: j .LBB21_7
-; RV32-NEXT: .LBB21_6: # %fp-to-i-if-else
+; RV32-NEXT: add a2, s6, a0
+; RV32-NEXT: sltu a3, s8, s1
+; RV32-NEXT: sltu a4, s11, s10
+; RV32-NEXT: add a1, s7, a1
+; RV32-NEXT: add a0, s8, a2
+; RV32-NEXT: add a4, s9, a4
+; RV32-NEXT: sltu a2, a2, s6
+; RV32-NEXT: sltu a5, a0, s8
+; RV32-NEXT: add a3, a4, a3
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: add a1, a1, a5
+; RV32-NEXT: j .LBB21_6
+; RV32-NEXT: .LBB21_5: # %fp-to-i-if-else
; RV32-NEXT: sw a1, 56(sp)
; RV32-NEXT: sw zero, 60(sp)
; RV32-NEXT: sw zero, 64(sp)
@@ -3234,10 +3226,10 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: sll a2, a2, a0
; RV32-NEXT: sll a5, a5, a0
; RV32-NEXT: sll a1, a1, a0
-; RV32-NEXT: or s3, a2, a6
+; RV32-NEXT: or s4, a2, a6
; RV32-NEXT: or a2, a5, a7
; RV32-NEXT: or a3, a1, a3
-; RV32-NEXT: sll s4, a4, a0
+; RV32-NEXT: sll s3, a4, a0
; RV32-NEXT: mv a0, s8
; RV32-NEXT: mv a1, s0
; RV32-NEXT: call __muldi3
@@ -3245,21 +3237,21 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: mv s5, a1
; RV32-NEXT: mv a0, s0
; RV32-NEXT: mv a1, s0
-; RV32-NEXT: mv a2, s4
-; RV32-NEXT: mv a3, s3
+; RV32-NEXT: mv a2, s3
+; RV32-NEXT: mv a3, s4
; RV32-NEXT: call __muldi3
; RV32-NEXT: add a1, a1, s5
; RV32-NEXT: add s1, a0, s2
; RV32-NEXT: sltu a0, s1, a0
; RV32-NEXT: add s7, a1, a0
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: mv a0, s3
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
@@ -3268,76 +3260,75 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: add s8, s5, a1
; RV32-NEXT: sltu a0, s8, s5
; RV32-NEXT: add s6, s6, a0
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: mv a0, s3
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s8, a0, s8
-; RV32-NEXT: sltu a0, s8, a0
+; RV32-NEXT: add s3, a0, s8
+; RV32-NEXT: sltu a0, s3, a0
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s4, s6, a0
-; RV32-NEXT: sltu s5, s4, s6
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: add s5, s6, a0
+; RV32-NEXT: sltu s6, s5, s6
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: mv a1, s8
-; RV32-NEXT: add a2, a2, s5
-; RV32-NEXT: add s4, a0, s4
-; RV32-NEXT: sltu a3, s4, a0
-; RV32-NEXT: add a0, s4, s1
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: add a2, a2, s7
-; RV32-NEXT: sltu a3, a0, s4
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: .LBB21_7: # %fp-to-i-cleanup
-; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a4, a3, -1
-; RV32-NEXT: beq a1, a4, .LBB21_9
-; RV32-NEXT: # %bb.8: # %fp-to-i-cleanup
-; RV32-NEXT: sltu a5, a1, a4
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: bnez a6, .LBB21_10
-; RV32-NEXT: j .LBB21_11
-; RV32-NEXT: .LBB21_9:
-; RV32-NEXT: sltiu a5, s2, -1
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: beqz a6, .LBB21_11
+; RV32-NEXT: add a1, a1, s6
+; RV32-NEXT: add s5, a0, s5
+; RV32-NEXT: sltu a2, s5, a0
+; RV32-NEXT: add a0, s5, s1
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: add a1, a1, s7
+; RV32-NEXT: sltu a2, a0, s5
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: .LBB21_6: # %fp-to-i-cleanup
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: beq s3, a3, .LBB21_8
+; RV32-NEXT: # %bb.7: # %fp-to-i-cleanup
+; RV32-NEXT: sltu a4, s3, a3
+; RV32-NEXT: or a5, a0, a1
+; RV32-NEXT: bnez a5, .LBB21_9
+; RV32-NEXT: j .LBB21_10
+; RV32-NEXT: .LBB21_8:
+; RV32-NEXT: sltiu a4, s2, -1
+; RV32-NEXT: or a5, a0, a1
+; RV32-NEXT: beqz a5, .LBB21_10
+; RV32-NEXT: .LBB21_9: # %fp-to-i-cleanup
+; RV32-NEXT: srli a4, a1, 31
; RV32-NEXT: .LBB21_10: # %fp-to-i-cleanup
-; RV32-NEXT: srli a5, a2, 31
-; RV32-NEXT: .LBB21_11: # %fp-to-i-cleanup
-; RV32-NEXT: neg a6, a5
-; RV32-NEXT: addi a7, a5, -1
-; RV32-NEXT: bnez a5, .LBB21_13
-; RV32-NEXT: # %bb.12: # %fp-to-i-cleanup
-; RV32-NEXT: mv a1, a4
-; RV32-NEXT: .LBB21_13: # %fp-to-i-cleanup
-; RV32-NEXT: or a4, a7, s2
-; RV32-NEXT: and a2, a6, a2
-; RV32-NEXT: and a5, a6, a0
-; RV32-NEXT: beq a1, a3, .LBB21_15
-; RV32-NEXT: # %bb.14: # %fp-to-i-cleanup
-; RV32-NEXT: sltu a0, a3, a1
-; RV32-NEXT: j .LBB21_16
-; RV32-NEXT: .LBB21_15:
-; RV32-NEXT: snez a0, a4
-; RV32-NEXT: .LBB21_16: # %fp-to-i-cleanup
-; RV32-NEXT: and a5, a5, a2
-; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a5, a3, .LBB21_18
-; RV32-NEXT: # %bb.17: # %fp-to-i-cleanup
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: xori a0, a2, 1
-; RV32-NEXT: .LBB21_18: # %fp-to-i-cleanup
-; RV32-NEXT: bnez a0, .LBB21_20
-; RV32-NEXT: # %bb.19: # %fp-to-i-cleanup
-; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB21_20: # %fp-to-i-cleanup
+; RV32-NEXT: neg a5, a4
+; RV32-NEXT: addi a6, a4, -1
+; RV32-NEXT: bnez a4, .LBB21_12
+; RV32-NEXT: # %bb.11: # %fp-to-i-cleanup
+; RV32-NEXT: mv s3, a3
+; RV32-NEXT: .LBB21_12: # %fp-to-i-cleanup
+; RV32-NEXT: or a3, a6, s2
+; RV32-NEXT: and a1, a5, a1
+; RV32-NEXT: and a4, a5, a0
+; RV32-NEXT: beq s3, a2, .LBB21_14
+; RV32-NEXT: # %bb.13: # %fp-to-i-cleanup
+; RV32-NEXT: sltu a0, a2, s3
+; RV32-NEXT: j .LBB21_15
+; RV32-NEXT: .LBB21_14:
+; RV32-NEXT: snez a0, a3
+; RV32-NEXT: .LBB21_15: # %fp-to-i-cleanup
+; RV32-NEXT: and a4, a4, a1
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beq a4, a2, .LBB21_17
+; RV32-NEXT: # %bb.16: # %fp-to-i-cleanup
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: xori a0, a1, 1
+; RV32-NEXT: .LBB21_17: # %fp-to-i-cleanup
+; RV32-NEXT: bnez a0, .LBB21_19
+; RV32-NEXT: # %bb.18: # %fp-to-i-cleanup
+; RV32-NEXT: lui s3, 524288
+; RV32-NEXT: .LBB21_19: # %fp-to-i-cleanup
; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: mv a1, s3
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
@@ -3367,6 +3358,16 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: addi sp, sp, 128
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
+; RV32-NEXT: .LBB21_20: # %fp-to-i-if-then5
+; RV32-NEXT: .cfi_restore_state
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: xori a2, a1, 1
+; RV32-NEXT: addi s2, a1, -1
+; RV32-NEXT: sub a1, a0, a2
+; RV32-NEXT: mv s3, s2
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: j .LBB21_6
;
; RV64-LABEL: stest_f32i64:
; RV64: # %bb.0: # %entry
@@ -3417,6 +3418,7 @@ define i64 @utest_f32i64(float %x) {
; RV32-NEXT: .cfi_offset s9, -44
; RV32-NEXT: .cfi_offset s10, -48
; RV32-NEXT: .cfi_offset s11, -52
+; RV32-NEXT: .cfi_remember_state
; RV32-NEXT: fmv.x.w a1, fa0
; RV32-NEXT: slli a0, a1, 1
; RV32-NEXT: srli a0, a0, 24
@@ -3427,23 +3429,14 @@ define i64 @utest_f32i64(float %x) {
; RV32-NEXT: li s9, 0
; RV32-NEXT: li a0, 0
; RV32-NEXT: li a1, 0
-; RV32-NEXT: j .LBB22_7
+; RV32-NEXT: j .LBB22_6
; RV32-NEXT: .LBB22_2: # %fp-to-i-if-end
; RV32-NEXT: addi a2, a0, -255
; RV32-NEXT: sltu a3, a2, a0
; RV32-NEXT: sltiu a2, a2, -128
; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB22_4
-; RV32-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: xori a2, a1, 1
-; RV32-NEXT: addi s2, a1, -1
-; RV32-NEXT: sub a1, a0, a2
-; RV32-NEXT: mv s9, s2
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: j .LBB22_7
-; RV32-NEXT: .LBB22_4: # %fp-to-i-if-end9
+; RV32-NEXT: bnez a2, .LBB22_7
+; RV32-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32-NEXT: srai s0, a1, 31
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: lui a2, 2048
@@ -3451,8 +3444,8 @@ define i64 @utest_f32i64(float %x) {
; RV32-NEXT: srli a1, a1, 9
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: ori s8, s0, 1
-; RV32-NEXT: bltu a3, a0, .LBB22_6
-; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT: bltu a3, a0, .LBB22_5
+; RV32-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: sw zero, 28(sp)
; RV32-NEXT: sw zero, 32(sp)
@@ -3544,8 +3537,8 @@ define i64 @utest_f32i64(float %x) {
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, a1, a5
-; RV32-NEXT: j .LBB22_7
-; RV32-NEXT: .LBB22_6: # %fp-to-i-if-else
+; RV32-NEXT: j .LBB22_6
+; RV32-NEXT: .LBB22_5: # %fp-to-i-if-else
; RV32-NEXT: sw a1, 56(sp)
; RV32-NEXT: sw zero, 60(sp)
; RV32-NEXT: sw zero, 64(sp)
@@ -3631,7 +3624,7 @@ define i64 @utest_f32i64(float %x) {
; RV32-NEXT: add a1, a1, s7
; RV32-NEXT: sltu a2, a0, s4
; RV32-NEXT: add a1, a1, a2
-; RV32-NEXT: .LBB22_7: # %fp-to-i-cleanup
+; RV32-NEXT: .LBB22_6: # %fp-to-i-cleanup
; RV32-NEXT: or a2, a1, a0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: seqz a2, a2
@@ -3671,6 +3664,16 @@ define i64 @utest_f32i64(float %x) {
; RV32-NEXT: addi sp, sp, 128
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
+; RV32-NEXT: .LBB22_7: # %fp-to-i-if-then5
+; RV32-NEXT: .cfi_restore_state
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: xori a2, a1, 1
+; RV32-NEXT: addi s2, a1, -1
+; RV32-NEXT: sub a1, a0, a2
+; RV32-NEXT: mv s9, s2
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: j .LBB22_6
;
; RV64-LABEL: utest_f32i64:
; RV64: # %bb.0: # %entry
@@ -3736,24 +3739,14 @@ define i64 @ustest_f32i64(float %x) {
; RV32-NEXT: li s9, 0
; RV32-NEXT: li a0, 0
; RV32-NEXT: li a1, 0
-; RV32-NEXT: j .LBB23_6
+; RV32-NEXT: j .LBB23_8
; RV32-NEXT: .LBB23_2: # %fp-to-i-if-end
; RV32-NEXT: addi a2, a0, -255
; RV32-NEXT: sltu a3, a2, a0
; RV32-NEXT: sltiu a2, a2, -128
; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB23_4
-; RV32-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: xori a2, a1, 1
-; RV32-NEXT: addi s2, a1, -1
-; RV32-NEXT: sub a1, a0, a2
-; RV32-NEXT: mv s9, s2
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: beqz a1, .LBB23_6
-; RV32-NEXT: j .LBB23_8
-; RV32-NEXT: .LBB23_4: # %fp-to-i-if-end9
+; RV32-NEXT: bnez a2, .LBB23_7
+; RV32-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32-NEXT: srai s0, a1, 31
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: lui a2, 2048
@@ -3761,8 +3754,8 @@ define i64 @ustest_f32i64(float %x) {
; RV32-NEXT: srli a1, a1, 9
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: ori s8, s0, 1
-; RV32-NEXT: bltu a3, a0, .LBB23_7
-; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT: bltu a3, a0, .LBB23_5
+; RV32-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: sw zero, 28(sp)
; RV32-NEXT: sw zero, 32(sp)
@@ -3854,11 +3847,9 @@ define i64 @ustest_f32i64(float %x) {
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, a1, a5
-; RV32-NEXT: bnez a1, .LBB23_8
-; RV32-NEXT: .LBB23_6:
-; RV32-NEXT: seqz a2, a0
-; RV32-NEXT: j .LBB23_9
-; RV32-NEXT: .LBB23_7: # %fp-to-i-if-else
+; RV32-NEXT: bnez a1, .LBB23_6
+; RV32-NEXT: j .LBB23_8
+; RV32-NEXT: .LBB23_5: # %fp-to-i-if-else
; RV32-NEXT: sw a1, 56(sp)
; RV32-NEXT: sw zero, 60(sp)
; RV32-NEXT: sw zero, 64(sp)
@@ -3944,9 +3935,21 @@ define i64 @ustest_f32i64(float %x) {
; RV32-NEXT: add a1, a1, s7
; RV32-NEXT: sltu a2, a0, s4
; RV32-NEXT: add a1, a1, a2
-; RV32-NEXT: beqz a1, .LBB23_6
-; RV32-NEXT: .LBB23_8: # %fp-to-i-cleanup
+; RV32-NEXT: beqz a1, .LBB23_8
+; RV32-NEXT: .LBB23_6: # %fp-to-i-cleanup
; RV32-NEXT: srli a2, a1, 31
+; RV32-NEXT: j .LBB23_9
+; RV32-NEXT: .LBB23_7: # %fp-to-i-if-then5
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: xori a2, a1, 1
+; RV32-NEXT: addi s2, a1, -1
+; RV32-NEXT: sub a1, a0, a2
+; RV32-NEXT: mv s9, s2
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: bnez a1, .LBB23_6
+; RV32-NEXT: .LBB23_8:
+; RV32-NEXT: seqz a2, a0
; RV32-NEXT: .LBB23_9: # %fp-to-i-cleanup
; RV32-NEXT: xori a3, a0, 1
; RV32-NEXT: or a3, a3, a1
@@ -5337,32 +5340,25 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: .cfi_offset s9, -44
; RV32IF-NEXT: .cfi_offset s10, -48
; RV32IF-NEXT: .cfi_offset s11, -52
+; RV32IF-NEXT: .cfi_remember_state
; RV32IF-NEXT: slli a2, a1, 1
; RV32IF-NEXT: srli a2, a2, 21
; RV32IF-NEXT: li a3, 1023
; RV32IF-NEXT: bgeu a2, a3, .LBB45_2
; RV32IF-NEXT: # %bb.1:
; RV32IF-NEXT: li s2, 0
-; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: li s3, 0
; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: li a2, 0
-; RV32IF-NEXT: j .LBB45_7
+; RV32IF-NEXT: li a1, 0
+; RV32IF-NEXT: j .LBB45_6
; RV32IF-NEXT: .LBB45_2: # %fp-to-i-if-end
; RV32IF-NEXT: addi a3, a2, -1151
; RV32IF-NEXT: sltu a4, a3, a2
; RV32IF-NEXT: sltiu a3, a3, -128
; RV32IF-NEXT: or a4, a4, a3
; RV32IF-NEXT: srli a3, a1, 31
-; RV32IF-NEXT: beqz a4, .LBB45_4
-; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT: xori a0, a3, 1
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: neg s2, a0
-; RV32IF-NEXT: sub a2, a2, a0
-; RV32IF-NEXT: mv a1, s2
-; RV32IF-NEXT: mv a0, s2
-; RV32IF-NEXT: j .LBB45_7
-; RV32IF-NEXT: .LBB45_4: # %fp-to-i-if-end9
+; RV32IF-NEXT: bnez a4, .LBB45_20
+; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IF-NEXT: neg s0, a3
; RV32IF-NEXT: slli a1, a1, 12
; RV32IF-NEXT: lui a3, 256
@@ -5370,8 +5366,8 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: srli a1, a1, 12
; RV32IF-NEXT: or a1, a1, a3
; RV32IF-NEXT: ori s8, s0, 1
-; RV32IF-NEXT: bltu a4, a2, .LBB45_6
-; RV32IF-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT: bltu a4, a2, .LBB45_5
+; RV32IF-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IF-NEXT: sw zero, 24(sp)
; RV32IF-NEXT: sw zero, 28(sp)
; RV32IF-NEXT: sw zero, 32(sp)
@@ -5403,70 +5399,68 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: srl a1, a1, a0
; RV32IF-NEXT: or a3, a3, a6
; RV32IF-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: or s5, a2, a7
-; RV32IF-NEXT: or s6, a1, a5
-; RV32IF-NEXT: srl s4, a4, a0
-; RV32IF-NEXT: mv a0, s6
+; RV32IF-NEXT: or s6, a2, a7
+; RV32IF-NEXT: or s7, a1, a5
+; RV32IF-NEXT: srl s5, a4, a0
+; RV32IF-NEXT: mv a0, s7
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: mv s2, a0
-; RV32IF-NEXT: mv s7, a1
-; RV32IF-NEXT: mv a0, s5
+; RV32IF-NEXT: mv s3, a1
+; RV32IF-NEXT: mv a0, s6
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: add s7, a0, s7
-; RV32IF-NEXT: sltu a0, s7, a0
-; RV32IF-NEXT: add s9, a1, a0
-; RV32IF-NEXT: mv a0, s6
+; RV32IF-NEXT: add s3, a0, s3
+; RV32IF-NEXT: sltu a0, s3, a0
+; RV32IF-NEXT: add s10, a1, a0
+; RV32IF-NEXT: mv a0, s7
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: add s10, a0, s7
-; RV32IF-NEXT: sltu a0, s10, a0
+; RV32IF-NEXT: add s3, a0, s3
+; RV32IF-NEXT: sltu a0, s3, a0
; RV32IF-NEXT: add a0, a1, a0
-; RV32IF-NEXT: add s11, s9, a0
-; RV32IF-NEXT: mv a0, s5
+; RV32IF-NEXT: add s11, s10, a0
+; RV32IF-NEXT: mv a0, s6
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: mv s1, a0
-; RV32IF-NEXT: mv s3, s8
-; RV32IF-NEXT: mv s8, a1
-; RV32IF-NEXT: add s7, a0, s11
+; RV32IF-NEXT: mv s9, a1
+; RV32IF-NEXT: mv s4, s8
+; RV32IF-NEXT: add s8, a0, s11
; RV32IF-NEXT: mv a0, s0
; RV32IF-NEXT: mv a1, s0
-; RV32IF-NEXT: mv a2, s6
-; RV32IF-NEXT: mv a3, s5
+; RV32IF-NEXT: mv a2, s7
+; RV32IF-NEXT: mv a3, s6
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: mv s5, a0
-; RV32IF-NEXT: mv s6, a1
-; RV32IF-NEXT: mv a0, s3
+; RV32IF-NEXT: mv s6, a0
+; RV32IF-NEXT: mv s7, a1
+; RV32IF-NEXT: mv a0, s4
; RV32IF-NEXT: mv a1, s0
; RV32IF-NEXT: lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: mv a3, s4
+; RV32IF-NEXT: mv a3, s5
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: mv a2, a1
-; RV32IF-NEXT: mv a1, s10
-; RV32IF-NEXT: add a3, s5, a0
-; RV32IF-NEXT: sltu a4, s7, s1
-; RV32IF-NEXT: sltu a5, s11, s9
-; RV32IF-NEXT: add a2, s6, a2
-; RV32IF-NEXT: add a0, s7, a3
-; RV32IF-NEXT: add a5, s8, a5
-; RV32IF-NEXT: sltu a3, a3, s5
-; RV32IF-NEXT: sltu a6, a0, s7
-; RV32IF-NEXT: add a4, a5, a4
-; RV32IF-NEXT: add a2, a2, a3
-; RV32IF-NEXT: add a2, a4, a2
-; RV32IF-NEXT: add a2, a2, a6
-; RV32IF-NEXT: j .LBB45_7
-; RV32IF-NEXT: .LBB45_6: # %fp-to-i-if-else
+; RV32IF-NEXT: add a2, s6, a0
+; RV32IF-NEXT: sltu a3, s8, s1
+; RV32IF-NEXT: sltu a4, s11, s10
+; RV32IF-NEXT: add a1, s7, a1
+; RV32IF-NEXT: add a0, s8, a2
+; RV32IF-NEXT: add a4, s9, a4
+; RV32IF-NEXT: sltu a2, a2, s6
+; RV32IF-NEXT: sltu a5, a0, s8
+; RV32IF-NEXT: add a3, a4, a3
+; RV32IF-NEXT: add a1, a1, a2
+; RV32IF-NEXT: add a1, a3, a1
+; RV32IF-NEXT: add a1, a1, a5
+; RV32IF-NEXT: j .LBB45_6
+; RV32IF-NEXT: .LBB45_5: # %fp-to-i-if-else
; RV32IF-NEXT: sw a0, 56(sp)
; RV32IF-NEXT: sw a1, 60(sp)
; RV32IF-NEXT: sw zero, 64(sp)
@@ -5495,10 +5489,10 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: sll a2, a2, a0
; RV32IF-NEXT: sll a5, a5, a0
; RV32IF-NEXT: sll a1, a1, a0
-; RV32IF-NEXT: or s3, a2, a6
+; RV32IF-NEXT: or s4, a2, a6
; RV32IF-NEXT: or a2, a5, a7
; RV32IF-NEXT: or a3, a1, a3
-; RV32IF-NEXT: sll s4, a4, a0
+; RV32IF-NEXT: sll s3, a4, a0
; RV32IF-NEXT: mv a0, s8
; RV32IF-NEXT: mv a1, s0
; RV32IF-NEXT: call __muldi3
@@ -5506,21 +5500,21 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: mv s5, a1
; RV32IF-NEXT: mv a0, s0
; RV32IF-NEXT: mv a1, s0
-; RV32IF-NEXT: mv a2, s4
-; RV32IF-NEXT: mv a3, s3
+; RV32IF-NEXT: mv a2, s3
+; RV32IF-NEXT: mv a3, s4
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: add a1, a1, s5
; RV32IF-NEXT: add s1, a0, s2
; RV32IF-NEXT: sltu a0, s1, a0
; RV32IF-NEXT: add s7, a1, a0
-; RV32IF-NEXT: mv a0, s3
+; RV32IF-NEXT: mv a0, s4
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
; RV32IF-NEXT: mv s5, a0
; RV32IF-NEXT: mv s6, a1
-; RV32IF-NEXT: mv a0, s4
+; RV32IF-NEXT: mv a0, s3
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s8
; RV32IF-NEXT: li a3, 0
@@ -5529,76 +5523,75 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: add s8, s5, a1
; RV32IF-NEXT: sltu a0, s8, s5
; RV32IF-NEXT: add s6, s6, a0
-; RV32IF-NEXT: mv a0, s4
+; RV32IF-NEXT: mv a0, s3
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: add s8, a0, s8
-; RV32IF-NEXT: sltu a0, s8, a0
+; RV32IF-NEXT: add s3, a0, s8
+; RV32IF-NEXT: sltu a0, s3, a0
; RV32IF-NEXT: add a0, a1, a0
-; RV32IF-NEXT: add s4, s6, a0
-; RV32IF-NEXT: sltu s5, s4, s6
-; RV32IF-NEXT: mv a0, s3
+; RV32IF-NEXT: add s5, s6, a0
+; RV32IF-NEXT: sltu s6, s5, s6
+; RV32IF-NEXT: mv a0, s4
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: mv a2, s0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: call __muldi3
-; RV32IF-NEXT: mv a2, a1
-; RV32IF-NEXT: mv a1, s8
-; RV32IF-NEXT: add a2, a2, s5
-; RV32IF-NEXT: add s4, a0, s4
-; RV32IF-NEXT: sltu a3, s4, a0
-; RV32IF-NEXT: add a0, s4, s1
-; RV32IF-NEXT: add a2, a2, a3
-; RV32IF-NEXT: add a2, a2, s7
-; RV32IF-NEXT: sltu a3, a0, s4
-; RV32IF-NEXT: add a2, a2, a3
-; RV32IF-NEXT: .LBB45_7: # %fp-to-i-cleanup
-; RV32IF-NEXT: lui a3, 524288
-; RV32IF-NEXT: addi a4, a3, -1
-; RV32IF-NEXT: beq a1, a4, .LBB45_9
-; RV32IF-NEXT: # %bb.8: # %fp-to-i-cleanup
-; RV32IF-NEXT: sltu a5, a1, a4
-; RV32IF-NEXT: or a6, a0, a2
-; RV32IF-NEXT: bnez a6, .LBB45_10
-; RV32IF-NEXT: j .LBB45_11
-; RV32IF-NEXT: .LBB45_9:
-; RV32IF-NEXT: sltiu a5, s2, -1
-; RV32IF-NEXT: or a6, a0, a2
-; RV32IF-NEXT: beqz a6, .LBB45_11
+; RV32IF-NEXT: add a1, a1, s6
+; RV32IF-NEXT: add s5, a0, s5
+; RV32IF-NEXT: sltu a2, s5, a0
+; RV32IF-NEXT: add a0, s5, s1
+; RV32IF-NEXT: add a1, a1, a2
+; RV32IF-NEXT: add a1, a1, s7
+; RV32IF-NEXT: sltu a2, a0, s5
+; RV32IF-NEXT: add a1, a1, a2
+; RV32IF-NEXT: .LBB45_6: # %fp-to-i-cleanup
+; RV32IF-NEXT: lui a2, 524288
+; RV32IF-NEXT: addi a3, a2, -1
+; RV32IF-NEXT: beq s3, a3, .LBB45_8
+; RV32IF-NEXT: # %bb.7: # %fp-to-i-cleanup
+; RV32IF-NEXT: sltu a4, s3, a3
+; RV32IF-NEXT: or a5, a0, a1
+; RV32IF-NEXT: bnez a5, .LBB45_9
+; RV32IF-NEXT: j .LBB45_10
+; RV32IF-NEXT: .LBB45_8:
+; RV32IF-NEXT: sltiu a4, s2, -1
+; RV32IF-NEXT: or a5, a0, a1
+; RV32IF-NEXT: beqz a5, .LBB45_10
+; RV32IF-NEXT: .LBB45_9: # %fp-to-i-cleanup
+; RV32IF-NEXT: srli a4, a1, 31
; RV32IF-NEXT: .LBB45_10: # %fp-to-i-cleanup
-; RV32IF-NEXT: srli a5, a2, 31
-; RV32IF-NEXT: .LBB45_11: # %fp-to-i-cleanup
-; RV32IF-NEXT: neg a6, a5
-; RV32IF-NEXT: addi a7, a5, -1
-; RV32IF-NEXT: bnez a5, .LBB45_13
-; RV32IF-NEXT: # %bb.12: # %fp-to-i-cleanup
-; RV32IF-NEXT: mv a1, a4
-; RV32IF-NEXT: .LBB45_13: # %fp-to-i-cleanup
-; RV32IF-NEXT: or a4, a7, s2
-; RV32IF-NEXT: and a2, a6, a2
-; RV32IF-NEXT: and a5, a6, a0
-; RV32IF-NEXT: beq a1, a3, .LBB45_15
-; RV32IF-NEXT: # %bb.14: # %fp-to-i-cleanup
-; RV32IF-NEXT: sltu a0, a3, a1
-; RV32IF-NEXT: j .LBB45_16
-; RV32IF-NEXT: .LBB45_15:
-; RV32IF-NEXT: snez a0, a4
-; RV32IF-NEXT: .LBB45_16: # %fp-to-i-cleanup
-; RV32IF-NEXT: and a5, a5, a2
-; RV32IF-NEXT: li a3, -1
-; RV32IF-NEXT: beq a5, a3, .LBB45_18
-; RV32IF-NEXT: # %bb.17: # %fp-to-i-cleanup
-; RV32IF-NEXT: srli a2, a2, 31
-; RV32IF-NEXT: xori a0, a2, 1
-; RV32IF-NEXT: .LBB45_18: # %fp-to-i-cleanup
-; RV32IF-NEXT: bnez a0, .LBB45_20
-; RV32IF-NEXT: # %bb.19: # %fp-to-i-cleanup
-; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: .LBB45_20: # %fp-to-i-cleanup
+; RV32IF-NEXT: neg a5, a4
+; RV32IF-NEXT: addi a6, a4, -1
+; RV32IF-NEXT: bnez a4, .LBB45_12
+; RV32IF-NEXT: # %bb.11: # %fp-to-i-cleanup
+; RV32IF-NEXT: mv s3, a3
+; RV32IF-NEXT: .LBB45_12: # %fp-to-i-cleanup
+; RV32IF-NEXT: or a3, a6, s2
+; RV32IF-NEXT: and a1, a5, a1
+; RV32IF-NEXT: and a4, a5, a0
+; RV32IF-NEXT: beq s3, a2, .LBB45_14
+; RV32IF-NEXT: # %bb.13: # %fp-to-i-cleanup
+; RV32IF-NEXT: sltu a0, a2, s3
+; RV32IF-NEXT: j .LBB45_15
+; RV32IF-NEXT: .LBB45_14:
+; RV32IF-NEXT: snez a0, a3
+; RV32IF-NEXT: .LBB45_15: # %fp-to-i-cleanup
+; RV32IF-NEXT: and a4, a4, a1
+; RV32IF-NEXT: li a2, -1
+; RV32IF-NEXT: beq a4, a2, .LBB45_17
+; RV32IF-NEXT: # %bb.16: # %fp-to-i-cleanup
+; RV32IF-NEXT: srli a1, a1, 31
+; RV32IF-NEXT: xori a0, a1, 1
+; RV32IF-NEXT: .LBB45_17: # %fp-to-i-cleanup
+; RV32IF-NEXT: bnez a0, .LBB45_19
+; RV32IF-NEXT: # %bb.18: # %fp-to-i-cleanup
+; RV32IF-NEXT: lui s3, 524288
+; RV32IF-NEXT: .LBB45_19: # %fp-to-i-cleanup
; RV32IF-NEXT: neg a0, a0
-; RV32IF-NEXT: and a0, a0, a4
+; RV32IF-NEXT: and a0, a0, a3
+; RV32IF-NEXT: mv a1, s3
; RV32IF-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
@@ -5628,6 +5621,15 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: addi sp, sp, 128
; RV32IF-NEXT: .cfi_def_cfa_offset 0
; RV32IF-NEXT: ret
+; RV32IF-NEXT: .LBB45_20: # %fp-to-i-if-then5
+; RV32IF-NEXT: .cfi_restore_state
+; RV32IF-NEXT: xori a0, a3, 1
+; RV32IF-NEXT: lui a1, 524288
+; RV32IF-NEXT: neg s2, a0
+; RV32IF-NEXT: sub a1, a1, a0
+; RV32IF-NEXT: mv s3, s2
+; RV32IF-NEXT: mv a0, s2
+; RV32IF-NEXT: j .LBB45_6
;
; RV64IF-LABEL: stest_f64i64_mm:
; RV64IF: # %bb.0: # %entry
@@ -5700,6 +5702,7 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset s9, -44
; RV32IFD-NEXT: .cfi_offset s10, -48
; RV32IFD-NEXT: .cfi_offset s11, -52
+; RV32IFD-NEXT: .cfi_remember_state
; RV32IFD-NEXT: fsd fa0, 16(sp)
; RV32IFD-NEXT: lw a2, 20(sp)
; RV32IFD-NEXT: slli a0, a2, 1
@@ -5708,26 +5711,18 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: bgeu a0, a1, .LBB45_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: li s2, 0
-; RV32IFD-NEXT: li a1, 0
+; RV32IFD-NEXT: li s3, 0
; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: li a2, 0
-; RV32IFD-NEXT: j .LBB45_7
+; RV32IFD-NEXT: li a1, 0
+; RV32IFD-NEXT: j .LBB45_6
; RV32IFD-NEXT: .LBB45_2: # %fp-to-i-if-end
; RV32IFD-NEXT: addi a1, a0, -1151
; RV32IFD-NEXT: sltu a3, a1, a0
; RV32IFD-NEXT: sltiu a1, a1, -128
; RV32IFD-NEXT: or a1, a3, a1
; RV32IFD-NEXT: srli a3, a2, 31
-; RV32IFD-NEXT: beqz a1, .LBB45_4
-; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT: xori a0, a3, 1
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: neg s2, a0
-; RV32IFD-NEXT: sub a2, a2, a0
-; RV32IFD-NEXT: mv a1, s2
-; RV32IFD-NEXT: mv a0, s2
-; RV32IFD-NEXT: j .LBB45_7
-; RV32IFD-NEXT: .LBB45_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT: bnez a1, .LBB45_20
+; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IFD-NEXT: lw a1, 16(sp)
; RV32IFD-NEXT: neg s0, a3
; RV32IFD-NEXT: slli a2, a2, 12
@@ -5736,8 +5731,8 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: or a2, a2, a3
; RV32IFD-NEXT: li a3, 1074
; RV32IFD-NEXT: ori s8, s0, 1
-; RV32IFD-NEXT: bltu a3, a0, .LBB45_6
-; RV32IFD-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT: bltu a3, a0, .LBB45_5
+; RV32IFD-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IFD-NEXT: sw zero, 40(sp)
; RV32IFD-NEXT: sw zero, 44(sp)
; RV32IFD-NEXT: sw zero, 48(sp)
@@ -5769,70 +5764,68 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: srl a0, a0, a1
; RV32IFD-NEXT: or a3, a3, a6
; RV32IFD-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: or s5, a2, a7
-; RV32IFD-NEXT: or s6, a0, a5
-; RV32IFD-NEXT: srl s4, a4, a1
-; RV32IFD-NEXT: mv a0, s6
+; RV32IFD-NEXT: or s6, a2, a7
+; RV32IFD-NEXT: or s7, a0, a5
+; RV32IFD-NEXT: srl s5, a4, a1
+; RV32IFD-NEXT: mv a0, s7
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: mv s2, a0
-; RV32IFD-NEXT: mv s7, a1
-; RV32IFD-NEXT: mv a0, s5
+; RV32IFD-NEXT: mv s3, a1
+; RV32IFD-NEXT: mv a0, s6
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: add s7, a0, s7
-; RV32IFD-NEXT: sltu a0, s7, a0
-; RV32IFD-NEXT: add s9, a1, a0
-; RV32IFD-NEXT: mv a0, s6
+; RV32IFD-NEXT: add s3, a0, s3
+; RV32IFD-NEXT: sltu a0, s3, a0
+; RV32IFD-NEXT: add s10, a1, a0
+; RV32IFD-NEXT: mv a0, s7
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: add s10, a0, s7
-; RV32IFD-NEXT: sltu a0, s10, a0
+; RV32IFD-NEXT: add s3, a0, s3
+; RV32IFD-NEXT: sltu a0, s3, a0
; RV32IFD-NEXT: add a0, a1, a0
-; RV32IFD-NEXT: add s11, s9, a0
-; RV32IFD-NEXT: mv a0, s5
+; RV32IFD-NEXT: add s11, s10, a0
+; RV32IFD-NEXT: mv a0, s6
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: mv s1, a0
-; RV32IFD-NEXT: mv s3, s8
-; RV32IFD-NEXT: mv s8, a1
-; RV32IFD-NEXT: add s7, a0, s11
+; RV32IFD-NEXT: mv s9, a1
+; RV32IFD-NEXT: mv s4, s8
+; RV32IFD-NEXT: add s8, a0, s11
; RV32IFD-NEXT: mv a0, s0
; RV32IFD-NEXT: mv a1, s0
-; RV32IFD-NEXT: mv a2, s6
-; RV32IFD-NEXT: mv a3, s5
+; RV32IFD-NEXT: mv a2, s7
+; RV32IFD-NEXT: mv a3, s6
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: mv s5, a0
-; RV32IFD-NEXT: mv s6, a1
-; RV32IFD-NEXT: mv a0, s3
+; RV32IFD-NEXT: mv s6, a0
+; RV32IFD-NEXT: mv s7, a1
+; RV32IFD-NEXT: mv a0, s4
; RV32IFD-NEXT: mv a1, s0
; RV32IFD-NEXT: lw a2, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: mv a3, s4
+; RV32IFD-NEXT: mv a3, s5
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: mv a2, a1
-; RV32IFD-NEXT: mv a1, s10
-; RV32IFD-NEXT: add a3, s5, a0
-; RV32IFD-NEXT: sltu a4, s7, s1
-; RV32IFD-NEXT: sltu a5, s11, s9
-; RV32IFD-NEXT: add a2, s6, a2
-; RV32IFD-NEXT: add a0, s7, a3
-; RV32IFD-NEXT: add a5, s8, a5
-; RV32IFD-NEXT: sltu a3, a3, s5
-; RV32IFD-NEXT: sltu a6, a0, s7
-; RV32IFD-NEXT: add a4, a5, a4
-; RV32IFD-NEXT: add a2, a2, a3
-; RV32IFD-NEXT: add a2, a4, a2
-; RV32IFD-NEXT: add a2, a2, a6
-; RV32IFD-NEXT: j .LBB45_7
-; RV32IFD-NEXT: .LBB45_6: # %fp-to-i-if-else
+; RV32IFD-NEXT: add a2, s6, a0
+; RV32IFD-NEXT: sltu a3, s8, s1
+; RV32IFD-NEXT: sltu a4, s11, s10
+; RV32IFD-NEXT: add a1, s7, a1
+; RV32IFD-NEXT: add a0, s8, a2
+; RV32IFD-NEXT: add a4, s9, a4
+; RV32IFD-NEXT: sltu a2, a2, s6
+; RV32IFD-NEXT: sltu a5, a0, s8
+; RV32IFD-NEXT: add a3, a4, a3
+; RV32IFD-NEXT: add a1, a1, a2
+; RV32IFD-NEXT: add a1, a3, a1
+; RV32IFD-NEXT: add a1, a1, a5
+; RV32IFD-NEXT: j .LBB45_6
+; RV32IFD-NEXT: .LBB45_5: # %fp-to-i-if-else
; RV32IFD-NEXT: sw a1, 72(sp)
; RV32IFD-NEXT: sw a2, 76(sp)
; RV32IFD-NEXT: sw zero, 80(sp)
@@ -5861,10 +5854,10 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: sll a2, a2, a0
; RV32IFD-NEXT: sll a5, a5, a0
; RV32IFD-NEXT: sll a1, a1, a0
-; RV32IFD-NEXT: or s3, a2, a6
+; RV32IFD-NEXT: or s4, a2, a6
; RV32IFD-NEXT: or a2, a5, a7
; RV32IFD-NEXT: or a3, a1, a3
-; RV32IFD-NEXT: sll s4, a4, a0
+; RV32IFD-NEXT: sll s3, a4, a0
; RV32IFD-NEXT: mv a0, s8
; RV32IFD-NEXT: mv a1, s0
; RV32IFD-NEXT: call __muldi3
@@ -5872,21 +5865,21 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: mv s5, a1
; RV32IFD-NEXT: mv a0, s0
; RV32IFD-NEXT: mv a1, s0
-; RV32IFD-NEXT: mv a2, s4
-; RV32IFD-NEXT: mv a3, s3
+; RV32IFD-NEXT: mv a2, s3
+; RV32IFD-NEXT: mv a3, s4
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: add a1, a1, s5
; RV32IFD-NEXT: add s1, a0, s2
; RV32IFD-NEXT: sltu a0, s1, a0
; RV32IFD-NEXT: add s7, a1, a0
-; RV32IFD-NEXT: mv a0, s3
+; RV32IFD-NEXT: mv a0, s4
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
; RV32IFD-NEXT: mv s5, a0
; RV32IFD-NEXT: mv s6, a1
-; RV32IFD-NEXT: mv a0, s4
+; RV32IFD-NEXT: mv a0, s3
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s8
; RV32IFD-NEXT: li a3, 0
@@ -5895,76 +5888,75 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: add s8, s5, a1
; RV32IFD-NEXT: sltu a0, s8, s5
; RV32IFD-NEXT: add s6, s6, a0
-; RV32IFD-NEXT: mv a0, s4
+; RV32IFD-NEXT: mv a0, s3
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: add s8, a0, s8
-; RV32IFD-NEXT: sltu a0, s8, a0
+; RV32IFD-NEXT: add s3, a0, s8
+; RV32IFD-NEXT: sltu a0, s3, a0
; RV32IFD-NEXT: add a0, a1, a0
-; RV32IFD-NEXT: add s4, s6, a0
-; RV32IFD-NEXT: sltu s5, s4, s6
-; RV32IFD-NEXT: mv a0, s3
+; RV32IFD-NEXT: add s5, s6, a0
+; RV32IFD-NEXT: sltu s6, s5, s6
+; RV32IFD-NEXT: mv a0, s4
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: mv a2, s0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: call __muldi3
-; RV32IFD-NEXT: mv a2, a1
-; RV32IFD-NEXT: mv a1, s8
-; RV32IFD-NEXT: add a2, a2, s5
-; RV32IFD-NEXT: add s4, a0, s4
-; RV32IFD-NEXT: sltu a3, s4, a0
-; RV32IFD-NEXT: add a0, s4, s1
-; RV32IFD-NEXT: add a2, a2, a3
-; RV32IFD-NEXT: add a2, a2, s7
-; RV32IFD-NEXT: sltu a3, a0, s4
-; RV32IFD-NEXT: add a2, a2, a3
-; RV32IFD-NEXT: .LBB45_7: # %fp-to-i-cleanup
-; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: addi a4, a3, -1
-; RV32IFD-NEXT: beq a1, a4, .LBB45_9
-; RV32IFD-NEXT: # %bb.8: # %fp-to-i-cleanup
-; RV32IFD-NEXT: sltu a5, a1, a4
-; RV32IFD-NEXT: or a6, a0, a2
-; RV32IFD-NEXT: bnez a6, .LBB45_10
-; RV32IFD-NEXT: j .LBB45_11
-; RV32IFD-NEXT: .LBB45_9:
-; RV32IFD-NEXT: sltiu a5, s2, -1
-; RV32IFD-NEXT: or a6, a0, a2
-; RV32IFD-NEXT: beqz a6, .LBB45_11
+; RV32IFD-NEXT: add a1, a1, s6
+; RV32IFD-NEXT: add s5, a0, s5
+; RV32IFD-NEXT: sltu a2, s5, a0
+; RV32IFD-NEXT: add a0, s5, s1
+; RV32IFD-NEXT: add a1, a1, a2
+; RV32IFD-NEXT: add a1, a1, s7
+; RV32IFD-NEXT: sltu a2, a0, s5
+; RV32IFD-NEXT: add a1, a1, a2
+; RV32IFD-NEXT: .LBB45_6: # %fp-to-i-cleanup
+; RV32IFD-NEXT: lui a2, 524288
+; RV32IFD-NEXT: addi a3, a2, -1
+; RV32IFD-NEXT: beq s3, a3, .LBB45_8
+; RV32IFD-NEXT: # %bb.7: # %fp-to-i-cleanup
+; RV32IFD-NEXT: sltu a4, s3, a3
+; RV32IFD-NEXT: or a5, a0, a1
+; RV32IFD-NEXT: bnez a5, .LBB45_9
+; RV32IFD-NEXT: j .LBB45_10
+; RV32IFD-NEXT: .LBB45_8:
+; RV32IFD-NEXT: sltiu a4, s2, -1
+; RV32IFD-NEXT: or a5, a0, a1
+; RV32IFD-NEXT: beqz a5, .LBB45_10
+; RV32IFD-NEXT: .LBB45_9: # %fp-to-i-cleanup
+; RV32IFD-NEXT: srli a4, a1, 31
; RV32IFD-NEXT: .LBB45_10: # %fp-to-i-cleanup
-; RV32IFD-NEXT: srli a5, a2, 31
-; RV32IFD-NEXT: .LBB45_11: # %fp-to-i-cleanup
-; RV32IFD-NEXT: neg a6, a5
-; RV32IFD-NEXT: addi a7, a5, -1
-; RV32IFD-NEXT: bnez a5, .LBB45_13
-; RV32IFD-NEXT: # %bb.12: # %fp-to-i-cleanup
-; RV32IFD-NEXT: mv a1, a4
-; RV32IFD-NEXT: .LBB45_13: # %fp-to-i-cleanup
-; RV32IFD-NEXT: or a4, a7, s2
-; RV32IFD-NEXT: and a2, a6, a2
-; RV32IFD-NEXT: and a5, a6, a0
-; RV32IFD-NEXT: beq a1, a3, .LBB45_15
-; RV32IFD-NEXT: # %bb.14: # %fp-to-i-cleanup
-; RV32IFD-NEXT: sltu a0, a3, a1
-; RV32IFD-NEXT: j .LBB45_16
-; RV32IFD-NEXT: .LBB45_15:
-; RV32IFD-NEXT: snez a0, a4
-; RV32IFD-NEXT: .LBB45_16: # %fp-to-i-cleanup
-; RV32IFD-NEXT: and a5, a5, a2
-; RV32IFD-NEXT: li a3, -1
-; RV32IFD-NEXT: beq a5, a3, .LBB45_18
-; RV32IFD-NEXT: # %bb.17: # %fp-to-i-cleanup
-; RV32IFD-NEXT: srli a2, a2, 31
-; RV32IFD-NEXT: xori a0, a2, 1
-; RV32IFD-NEXT: .LBB45_18: # %fp-to-i-cleanup
-; RV32IFD-NEXT: bnez a0, .LBB45_20
-; RV32IFD-NEXT: # %bb.19: # %fp-to-i-cleanup
-; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: .LBB45_20: # %fp-to-i-cleanup
+; RV32IFD-NEXT: neg a5, a4
+; RV32IFD-NEXT: addi a6, a4, -1
+; RV32IFD-NEXT: bnez a4, .LBB45_12
+; RV32IFD-NEXT: # %bb.11: # %fp-to-i-cleanup
+; RV32IFD-NEXT: mv s3, a3
+; RV32IFD-NEXT: .LBB45_12: # %fp-to-i-cleanup
+; RV32IFD-NEXT: or a3, a6, s2
+; RV32IFD-NEXT: and a1, a5, a1
+; RV32IFD-NEXT: and a4, a5, a0
+; RV32IFD-NEXT: beq s3, a2, .LBB45_14
+; RV32IFD-NEXT: # %bb.13: # %fp-to-i-cleanup
+; RV32IFD-NEXT: sltu a0, a2, s3
+; RV32IFD-NEXT: j .LBB45_15
+; RV32IFD-NEXT: .LBB45_14:
+; RV32IFD-NEXT: snez a0, a3
+; RV32IFD-NEXT: .LBB45_15: # %fp-to-i-cleanup
+; RV32IFD-NEXT: and a4, a4, a1
+; RV32IFD-NEXT: li a2, -1
+; RV32IFD-NEXT: beq a4, a2, .LBB45_17
+; RV32IFD-NEXT: # %bb.16: # %fp-to-i-cleanup
+; RV32IFD-NEXT: srli a1, a1, 31
+; RV32IFD-NEXT: xori a0, a1, 1
+; RV32IFD-NEXT: .LBB45_17: # %fp-to-i-cleanup
+; RV32IFD-NEXT: bnez a0, .LBB45_19
+; RV32IFD-NEXT: # %bb.18: # %fp-to-i-cleanup
+; RV32IFD-NEXT: lui s3, 524288
+; RV32IFD-NEXT: .LBB45_19: # %fp-to-i-cleanup
; RV32IFD-NEXT: neg a0, a0
-; RV32IFD-NEXT: and a0, a0, a4
+; RV32IFD-NEXT: and a0, a0, a3
+; RV32IFD-NEXT: mv a1, s3
; RV32IFD-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
@@ -5994,6 +5986,15 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: addi sp, sp, 144
; RV32IFD-NEXT: .cfi_def_cfa_offset 0
; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB45_20: # %fp-to-i-if-then5
+; RV32IFD-NEXT: .cfi_restore_state
+; RV32IFD-NEXT: xori a0, a3, 1
+; RV32IFD-NEXT: lui a1, 524288
+; RV32IFD-NEXT: neg s2, a0
+; RV32IFD-NEXT: sub a1, a1, a0
+; RV32IFD-NEXT: mv s3, s2
+; RV32IFD-NEXT: mv a0, s2
+; RV32IFD-NEXT: j .LBB45_6
;
; RV64IFD-LABEL: stest_f64i64_mm:
; RV64IFD: # %bb.0: # %entry
@@ -6042,6 +6043,7 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IF-NEXT: .cfi_offset s9, -44
; RV32IF-NEXT: .cfi_offset s10, -48
; RV32IF-NEXT: .cfi_offset s11, -52
+; RV32IF-NEXT: .cfi_remember_state
; RV32IF-NEXT: slli a2, a1, 1
; RV32IF-NEXT: srli a2, a2, 21
; RV32IF-NEXT: li a3, 1023
@@ -6051,23 +6053,15 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IF-NEXT: li s9, 0
; RV32IF-NEXT: li a0, 0
; RV32IF-NEXT: li a1, 0
-; RV32IF-NEXT: j .LBB46_7
+; RV32IF-NEXT: j .LBB46_6
; RV32IF-NEXT: .LBB46_2: # %fp-to-i-if-end
; RV32IF-NEXT: addi a3, a2, -1151
; RV32IF-NEXT: sltu a4, a3, a2
; RV32IF-NEXT: sltiu a3, a3, -128
; RV32IF-NEXT: or a4, a4, a3
; RV32IF-NEXT: srli a3, a1, 31
-; RV32IF-NEXT: beqz a4, .LBB46_4
-; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT: xori a0, a3, 1
-; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: neg s2, a0
-; RV32IF-NEXT: sub a1, a1, a0
-; RV32IF-NEXT: mv s9, s2
-; RV32IF-NEXT: mv a0, s2
-; RV32IF-NEXT: j .LBB46_7
-; RV32IF-NEXT: .LBB46_4: # %fp-to-i-if-end9
+; RV32IF-NEXT: bnez a4, .LBB46_7
+; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IF-NEXT: neg s0, a3
; RV32IF-NEXT: slli a1, a1, 12
; RV32IF-NEXT: lui a3, 256
@@ -6075,8 +6069,8 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IF-NEXT: srli a1, a1, 12
; RV32IF-NEXT: or a1, a1, a3
; RV32IF-NEXT: ori s8, s0, 1
-; RV32IF-NEXT: bltu a4, a2, .LBB46_6
-; RV32IF-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT: bltu a4, a2, .LBB46_5
+; RV32IF-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IF-NEXT: sw zero, 24(sp)
; RV32IF-NEXT: sw zero, 28(sp)
; RV32IF-NEXT: sw zero, 32(sp)
@@ -6168,8 +6162,8 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IF-NEXT: add a1, a1, a2
; RV32IF-NEXT: add a1, a3, a1
; RV32IF-NEXT: add a1, a1, a5
-; RV32IF-NEXT: j .LBB46_7
-; RV32IF-NEXT: .LBB46_6: # %fp-to-i-if-else
+; RV32IF-NEXT: j .LBB46_6
+; RV32IF-NEXT: .LBB46_5: # %fp-to-i-if-else
; RV32IF-NEXT: sw a0, 56(sp)
; RV32IF-NEXT: sw a1, 60(sp)
; RV32IF-NEXT: sw zero, 64(sp)
@@ -6255,7 +6249,7 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IF-NEXT: add a1, a1, s7
; RV32IF-NEXT: sltu a2, a0, s4
; RV32IF-NEXT: add a1, a1, a2
-; RV32IF-NEXT: .LBB46_7: # %fp-to-i-cleanup
+; RV32IF-NEXT: .LBB46_6: # %fp-to-i-cleanup
; RV32IF-NEXT: or a2, a1, a0
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: seqz a2, a2
@@ -6295,6 +6289,15 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IF-NEXT: addi sp, sp, 128
; RV32IF-NEXT: .cfi_def_cfa_offset 0
; RV32IF-NEXT: ret
+; RV32IF-NEXT: .LBB46_7: # %fp-to-i-if-then5
+; RV32IF-NEXT: .cfi_restore_state
+; RV32IF-NEXT: xori a0, a3, 1
+; RV32IF-NEXT: lui a1, 524288
+; RV32IF-NEXT: neg s2, a0
+; RV32IF-NEXT: sub a1, a1, a0
+; RV32IF-NEXT: mv s9, s2
+; RV32IF-NEXT: mv a0, s2
+; RV32IF-NEXT: j .LBB46_6
;
; RV64-LABEL: utest_f64i64_mm:
; RV64: # %bb.0: # %entry
@@ -6342,6 +6345,7 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset s9, -44
; RV32IFD-NEXT: .cfi_offset s10, -48
; RV32IFD-NEXT: .cfi_offset s11, -52
+; RV32IFD-NEXT: .cfi_remember_state
; RV32IFD-NEXT: fsd fa0, 16(sp)
; RV32IFD-NEXT: lw a2, 20(sp)
; RV32IFD-NEXT: slli a0, a2, 1
@@ -6353,23 +6357,15 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IFD-NEXT: li s9, 0
; RV32IFD-NEXT: li a0, 0
; RV32IFD-NEXT: li a1, 0
-; RV32IFD-NEXT: j .LBB46_7
+; RV32IFD-NEXT: j .LBB46_6
; RV32IFD-NEXT: .LBB46_2: # %fp-to-i-if-end
; RV32IFD-NEXT: addi a1, a0, -1151
; RV32IFD-NEXT: sltu a3, a1, a0
; RV32IFD-NEXT: sltiu a1, a1, -128
; RV32IFD-NEXT: or a1, a3, a1
; RV32IFD-NEXT: srli a3, a2, 31
-; RV32IFD-NEXT: beqz a1, .LBB46_4
-; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT: xori a0, a3, 1
-; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: neg s2, a0
-; RV32IFD-NEXT: sub a1, a1, a0
-; RV32IFD-NEXT: mv s9, s2
-; RV32IFD-NEXT: mv a0, s2
-; RV32IFD-NEXT: j .LBB46_7
-; RV32IFD-NEXT: .LBB46_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT: bnez a1, .LBB46_7
+; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IFD-NEXT: lw a1, 16(sp)
; RV32IFD-NEXT: neg s0, a3
; RV32IFD-NEXT: slli a2, a2, 12
@@ -6378,8 +6374,8 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IFD-NEXT: or a2, a2, a3
; RV32IFD-NEXT: li a3, 1074
; RV32IFD-NEXT: ori s8, s0, 1
-; RV32IFD-NEXT: bltu a3, a0, .LBB46_6
-; RV32IFD-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT: bltu a3, a0, .LBB46_5
+; RV32IFD-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IFD-NEXT: sw zero, 40(sp)
; RV32IFD-NEXT: sw zero, 44(sp)
; RV32IFD-NEXT: sw zero, 48(sp)
@@ -6471,8 +6467,8 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IFD-NEXT: add a1, a1, a2
; RV32IFD-NEXT: add a1, a3, a1
; RV32IFD-NEXT: add a1, a1, a5
-; RV32IFD-NEXT: j .LBB46_7
-; RV32IFD-NEXT: .LBB46_6: # %fp-to-i-if-else
+; RV32IFD-NEXT: j .LBB46_6
+; RV32IFD-NEXT: .LBB46_5: # %fp-to-i-if-else
; RV32IFD-NEXT: sw a1, 72(sp)
; RV32IFD-NEXT: sw a2, 76(sp)
; RV32IFD-NEXT: sw zero, 80(sp)
@@ -6558,7 +6554,7 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IFD-NEXT: add a1, a1, s7
; RV32IFD-NEXT: sltu a2, a0, s4
; RV32IFD-NEXT: add a1, a1, a2
-; RV32IFD-NEXT: .LBB46_7: # %fp-to-i-cleanup
+; RV32IFD-NEXT: .LBB46_6: # %fp-to-i-cleanup
; RV32IFD-NEXT: or a2, a1, a0
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: seqz a2, a2
@@ -6598,6 +6594,15 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IFD-NEXT: addi sp, sp, 144
; RV32IFD-NEXT: .cfi_def_cfa_offset 0
; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB46_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT: .cfi_restore_state
+; RV32IFD-NEXT: xori a0, a3, 1
+; RV32IFD-NEXT: lui a1, 524288
+; RV32IFD-NEXT: neg s2, a0
+; RV32IFD-NEXT: sub a1, a1, a0
+; RV32IFD-NEXT: mv s9, s2
+; RV32IFD-NEXT: mv a0, s2
+; RV32IFD-NEXT: j .LBB46_6
entry:
%conv = fptoui double %x to i128
%spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
@@ -6645,24 +6650,15 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IF-NEXT: li s9, 0
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: li a0, 0
-; RV32IF-NEXT: j .LBB47_6
+; RV32IF-NEXT: j .LBB47_8
; RV32IF-NEXT: .LBB47_2: # %fp-to-i-if-end
; RV32IF-NEXT: addi a3, a2, -1151
; RV32IF-NEXT: sltu a4, a3, a2
; RV32IF-NEXT: sltiu a3, a3, -128
; RV32IF-NEXT: or a4, a4, a3
; RV32IF-NEXT: srli a3, a1, 31
-; RV32IF-NEXT: beqz a4, .LBB47_4
-; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT: xori a0, a3, 1
-; RV32IF-NEXT: lui a1, 524288
-; RV32IF-NEXT: neg s2, a0
-; RV32IF-NEXT: sub a0, a1, a0
-; RV32IF-NEXT: mv s9, s2
-; RV32IF-NEXT: mv a1, s2
-; RV32IF-NEXT: beqz a0, .LBB47_6
-; RV32IF-NEXT: j .LBB47_8
-; RV32IF-NEXT: .LBB47_4: # %fp-to-i-if-end9
+; RV32IF-NEXT: bnez a4, .LBB47_7
+; RV32IF-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IF-NEXT: neg s0, a3
; RV32IF-NEXT: slli a1, a1, 12
; RV32IF-NEXT: lui a3, 256
@@ -6670,8 +6666,8 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IF-NEXT: srli a1, a1, 12
; RV32IF-NEXT: or a1, a1, a3
; RV32IF-NEXT: ori s8, s0, 1
-; RV32IF-NEXT: bltu a4, a2, .LBB47_7
-; RV32IF-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT: bltu a4, a2, .LBB47_5
+; RV32IF-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IF-NEXT: sw zero, 24(sp)
; RV32IF-NEXT: sw zero, 28(sp)
; RV32IF-NEXT: sw zero, 32(sp)
@@ -6763,11 +6759,9 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IF-NEXT: add a0, s6, a0
; RV32IF-NEXT: add a0, a2, a0
; RV32IF-NEXT: add a0, a0, a4
-; RV32IF-NEXT: bnez a0, .LBB47_8
-; RV32IF-NEXT: .LBB47_6:
-; RV32IF-NEXT: seqz a2, a1
-; RV32IF-NEXT: j .LBB47_9
-; RV32IF-NEXT: .LBB47_7: # %fp-to-i-if-else
+; RV32IF-NEXT: bnez a0, .LBB47_6
+; RV32IF-NEXT: j .LBB47_8
+; RV32IF-NEXT: .LBB47_5: # %fp-to-i-if-else
; RV32IF-NEXT: sw a0, 56(sp)
; RV32IF-NEXT: sw a1, 60(sp)
; RV32IF-NEXT: sw zero, 64(sp)
@@ -6853,9 +6847,20 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IF-NEXT: add a0, a0, s7
; RV32IF-NEXT: sltu a2, a1, s4
; RV32IF-NEXT: add a0, a0, a2
-; RV32IF-NEXT: beqz a0, .LBB47_6
-; RV32IF-NEXT: .LBB47_8: # %fp-to-i-cleanup
+; RV32IF-NEXT: beqz a0, .LBB47_8
+; RV32IF-NEXT: .LBB47_6: # %fp-to-i-cleanup
; RV32IF-NEXT: srli a2, a0, 31
+; RV32IF-NEXT: j .LBB47_9
+; RV32IF-NEXT: .LBB47_7: # %fp-to-i-if-then5
+; RV32IF-NEXT: xori a0, a3, 1
+; RV32IF-NEXT: lui a1, 524288
+; RV32IF-NEXT: neg s2, a0
+; RV32IF-NEXT: sub a0, a1, a0
+; RV32IF-NEXT: mv s9, s2
+; RV32IF-NEXT: mv a1, s2
+; RV32IF-NEXT: bnez a0, .LBB47_6
+; RV32IF-NEXT: .LBB47_8:
+; RV32IF-NEXT: seqz a2, a1
; RV32IF-NEXT: .LBB47_9: # %fp-to-i-cleanup
; RV32IF-NEXT: xori a1, a1, 1
; RV32IF-NEXT: or a1, a1, a0
@@ -6965,24 +6970,15 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IFD-NEXT: li s9, 0
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: li a0, 0
-; RV32IFD-NEXT: j .LBB47_6
+; RV32IFD-NEXT: j .LBB47_8
; RV32IFD-NEXT: .LBB47_2: # %fp-to-i-if-end
; RV32IFD-NEXT: addi a1, a0, -1151
; RV32IFD-NEXT: sltu a3, a1, a0
; RV32IFD-NEXT: sltiu a1, a1, -128
; RV32IFD-NEXT: or a1, a3, a1
; RV32IFD-NEXT: srli a3, a2, 31
-; RV32IFD-NEXT: beqz a1, .LBB47_4
-; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT: xori a0, a3, 1
-; RV32IFD-NEXT: lui a1, 524288
-; RV32IFD-NEXT: neg s2, a0
-; RV32IFD-NEXT: sub a0, a1, a0
-; RV32IFD-NEXT: mv s9, s2
-; RV32IFD-NEXT: mv a1, s2
-; RV32IFD-NEXT: beqz a0, .LBB47_6
-; RV32IFD-NEXT: j .LBB47_8
-; RV32IFD-NEXT: .LBB47_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT: bnez a1, .LBB47_7
+; RV32IFD-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32IFD-NEXT: lw a1, 16(sp)
; RV32IFD-NEXT: neg s0, a3
; RV32IFD-NEXT: slli a2, a2, 12
@@ -6991,8 +6987,8 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IFD-NEXT: or a2, a2, a3
; RV32IFD-NEXT: li a3, 1074
; RV32IFD-NEXT: ori s8, s0, 1
-; RV32IFD-NEXT: bltu a3, a0, .LBB47_7
-; RV32IFD-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT: bltu a3, a0, .LBB47_5
+; RV32IFD-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32IFD-NEXT: sw zero, 40(sp)
; RV32IFD-NEXT: sw zero, 44(sp)
; RV32IFD-NEXT: sw zero, 48(sp)
@@ -7084,11 +7080,9 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IFD-NEXT: add a0, s6, a0
; RV32IFD-NEXT: add a0, a2, a0
; RV32IFD-NEXT: add a0, a0, a4
-; RV32IFD-NEXT: bnez a0, .LBB47_8
-; RV32IFD-NEXT: .LBB47_6:
-; RV32IFD-NEXT: seqz a2, a1
-; RV32IFD-NEXT: j .LBB47_9
-; RV32IFD-NEXT: .LBB47_7: # %fp-to-i-if-else
+; RV32IFD-NEXT: bnez a0, .LBB47_6
+; RV32IFD-NEXT: j .LBB47_8
+; RV32IFD-NEXT: .LBB47_5: # %fp-to-i-if-else
; RV32IFD-NEXT: sw a1, 72(sp)
; RV32IFD-NEXT: sw a2, 76(sp)
; RV32IFD-NEXT: sw zero, 80(sp)
@@ -7174,9 +7168,20 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IFD-NEXT: add a0, a0, s7
; RV32IFD-NEXT: sltu a2, a1, s4
; RV32IFD-NEXT: add a0, a0, a2
-; RV32IFD-NEXT: beqz a0, .LBB47_6
-; RV32IFD-NEXT: .LBB47_8: # %fp-to-i-cleanup
+; RV32IFD-NEXT: beqz a0, .LBB47_8
+; RV32IFD-NEXT: .LBB47_6: # %fp-to-i-cleanup
; RV32IFD-NEXT: srli a2, a0, 31
+; RV32IFD-NEXT: j .LBB47_9
+; RV32IFD-NEXT: .LBB47_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT: xori a0, a3, 1
+; RV32IFD-NEXT: lui a1, 524288
+; RV32IFD-NEXT: neg s2, a0
+; RV32IFD-NEXT: sub a0, a1, a0
+; RV32IFD-NEXT: mv s9, s2
+; RV32IFD-NEXT: mv a1, s2
+; RV32IFD-NEXT: bnez a0, .LBB47_6
+; RV32IFD-NEXT: .LBB47_8:
+; RV32IFD-NEXT: seqz a2, a1
; RV32IFD-NEXT: .LBB47_9: # %fp-to-i-cleanup
; RV32IFD-NEXT: xori a1, a1, 1
; RV32IFD-NEXT: or a1, a1, a0
@@ -7259,6 +7264,7 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset s9, -44
; RV32-NEXT: .cfi_offset s10, -48
; RV32-NEXT: .cfi_offset s11, -52
+; RV32-NEXT: .cfi_remember_state
; RV32-NEXT: fmv.x.w a1, fa0
; RV32-NEXT: slli a0, a1, 1
; RV32-NEXT: srli a0, a0, 24
@@ -7266,26 +7272,17 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: bgeu a0, a2, .LBB48_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li s2, 0
-; RV32-NEXT: li a1, 0
+; RV32-NEXT: li s3, 0
; RV32-NEXT: li a0, 0
-; RV32-NEXT: li a2, 0
-; RV32-NEXT: j .LBB48_7
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: j .LBB48_6
; RV32-NEXT: .LBB48_2: # %fp-to-i-if-end
; RV32-NEXT: addi a2, a0, -255
; RV32-NEXT: sltu a3, a2, a0
; RV32-NEXT: sltiu a2, a2, -128
; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB48_4
-; RV32-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: xori a2, a1, 1
-; RV32-NEXT: addi s2, a1, -1
-; RV32-NEXT: sub a2, a0, a2
-; RV32-NEXT: mv a1, s2
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: j .LBB48_7
-; RV32-NEXT: .LBB48_4: # %fp-to-i-if-end9
+; RV32-NEXT: bnez a2, .LBB48_20
+; RV32-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32-NEXT: srai s0, a1, 31
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: lui a2, 2048
@@ -7293,8 +7290,8 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: srli a1, a1, 9
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: ori s8, s0, 1
-; RV32-NEXT: bltu a3, a0, .LBB48_6
-; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT: bltu a3, a0, .LBB48_5
+; RV32-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: sw zero, 28(sp)
; RV32-NEXT: sw zero, 32(sp)
@@ -7326,70 +7323,68 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: srl a0, a0, a1
; RV32-NEXT: or a3, a3, a6
; RV32-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT: or s5, a2, a7
-; RV32-NEXT: or s6, a0, a5
-; RV32-NEXT: srl s4, a4, a1
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: or s6, a2, a7
+; RV32-NEXT: or s7, a0, a5
+; RV32-NEXT: srl s5, a4, a1
+; RV32-NEXT: mv a0, s7
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s2, a0
-; RV32-NEXT: mv s7, a1
-; RV32-NEXT: mv a0, s5
+; RV32-NEXT: mv s3, a1
+; RV32-NEXT: mv a0, s6
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s7, a0, s7
-; RV32-NEXT: sltu a0, s7, a0
-; RV32-NEXT: add s9, a1, a0
-; RV32-NEXT: mv a0, s6
+; RV32-NEXT: add s3, a0, s3
+; RV32-NEXT: sltu a0, s3, a0
+; RV32-NEXT: add s10, a1, a0
+; RV32-NEXT: mv a0, s7
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s10, a0, s7
-; RV32-NEXT: sltu a0, s10, a0
+; RV32-NEXT: add s3, a0, s3
+; RV32-NEXT: sltu a0, s3, a0
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s11, s9, a0
-; RV32-NEXT: mv a0, s5
+; RV32-NEXT: add s11, s10, a0
+; RV32-NEXT: mv a0, s6
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s1, a0
-; RV32-NEXT: mv s3, s8
-; RV32-NEXT: mv s8, a1
-; RV32-NEXT: add s7, a0, s11
+; RV32-NEXT: mv s9, a1
+; RV32-NEXT: mv s4, s8
+; RV32-NEXT: add s8, a0, s11
; RV32-NEXT: mv a0, s0
; RV32-NEXT: mv a1, s0
-; RV32-NEXT: mv a2, s6
-; RV32-NEXT: mv a3, s5
+; RV32-NEXT: mv a2, s7
+; RV32-NEXT: mv a3, s6
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv s5, a0
-; RV32-NEXT: mv s6, a1
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: mv s6, a0
+; RV32-NEXT: mv s7, a1
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: mv a1, s0
; RV32-NEXT: lw a2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT: mv a3, s4
+; RV32-NEXT: mv a3, s5
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: mv a1, s10
-; RV32-NEXT: add a3, s5, a0
-; RV32-NEXT: sltu a4, s7, s1
-; RV32-NEXT: sltu a5, s11, s9
-; RV32-NEXT: add a2, s6, a2
-; RV32-NEXT: add a0, s7, a3
-; RV32-NEXT: add a5, s8, a5
-; RV32-NEXT: sltu a3, a3, s5
-; RV32-NEXT: sltu a6, a0, s7
-; RV32-NEXT: add a4, a5, a4
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: add a2, a4, a2
-; RV32-NEXT: add a2, a2, a6
-; RV32-NEXT: j .LBB48_7
-; RV32-NEXT: .LBB48_6: # %fp-to-i-if-else
+; RV32-NEXT: add a2, s6, a0
+; RV32-NEXT: sltu a3, s8, s1
+; RV32-NEXT: sltu a4, s11, s10
+; RV32-NEXT: add a1, s7, a1
+; RV32-NEXT: add a0, s8, a2
+; RV32-NEXT: add a4, s9, a4
+; RV32-NEXT: sltu a2, a2, s6
+; RV32-NEXT: sltu a5, a0, s8
+; RV32-NEXT: add a3, a4, a3
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: add a1, a1, a5
+; RV32-NEXT: j .LBB48_6
+; RV32-NEXT: .LBB48_5: # %fp-to-i-if-else
; RV32-NEXT: sw a1, 56(sp)
; RV32-NEXT: sw zero, 60(sp)
; RV32-NEXT: sw zero, 64(sp)
@@ -7418,10 +7413,10 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: sll a2, a2, a0
; RV32-NEXT: sll a5, a5, a0
; RV32-NEXT: sll a1, a1, a0
-; RV32-NEXT: or s3, a2, a6
+; RV32-NEXT: or s4, a2, a6
; RV32-NEXT: or a2, a5, a7
; RV32-NEXT: or a3, a1, a3
-; RV32-NEXT: sll s4, a4, a0
+; RV32-NEXT: sll s3, a4, a0
; RV32-NEXT: mv a0, s8
; RV32-NEXT: mv a1, s0
; RV32-NEXT: call __muldi3
@@ -7429,21 +7424,21 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: mv s5, a1
; RV32-NEXT: mv a0, s0
; RV32-NEXT: mv a1, s0
-; RV32-NEXT: mv a2, s4
-; RV32-NEXT: mv a3, s3
+; RV32-NEXT: mv a2, s3
+; RV32-NEXT: mv a3, s4
; RV32-NEXT: call __muldi3
; RV32-NEXT: add a1, a1, s5
; RV32-NEXT: add s1, a0, s2
; RV32-NEXT: sltu a0, s1, a0
; RV32-NEXT: add s7, a1, a0
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
; RV32-NEXT: mv s5, a0
; RV32-NEXT: mv s6, a1
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: mv a0, s3
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s8
; RV32-NEXT: li a3, 0
@@ -7452,76 +7447,75 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: add s8, s5, a1
; RV32-NEXT: sltu a0, s8, s5
; RV32-NEXT: add s6, s6, a0
-; RV32-NEXT: mv a0, s4
+; RV32-NEXT: mv a0, s3
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: add s8, a0, s8
-; RV32-NEXT: sltu a0, s8, a0
+; RV32-NEXT: add s3, a0, s8
+; RV32-NEXT: sltu a0, s3, a0
; RV32-NEXT: add a0, a1, a0
-; RV32-NEXT: add s4, s6, a0
-; RV32-NEXT: sltu s5, s4, s6
-; RV32-NEXT: mv a0, s3
+; RV32-NEXT: add s5, s6, a0
+; RV32-NEXT: sltu s6, s5, s6
+; RV32-NEXT: mv a0, s4
; RV32-NEXT: li a1, 0
; RV32-NEXT: mv a2, s0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: mv a2, a1
-; RV32-NEXT: mv a1, s8
-; RV32-NEXT: add a2, a2, s5
-; RV32-NEXT: add s4, a0, s4
-; RV32-NEXT: sltu a3, s4, a0
-; RV32-NEXT: add a0, s4, s1
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: add a2, a2, s7
-; RV32-NEXT: sltu a3, a0, s4
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: .LBB48_7: # %fp-to-i-cleanup
-; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a4, a3, -1
-; RV32-NEXT: beq a1, a4, .LBB48_9
-; RV32-NEXT: # %bb.8: # %fp-to-i-cleanup
-; RV32-NEXT: sltu a5, a1, a4
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: bnez a6, .LBB48_10
-; RV32-NEXT: j .LBB48_11
-; RV32-NEXT: .LBB48_9:
-; RV32-NEXT: sltiu a5, s2, -1
-; RV32-NEXT: or a6, a0, a2
-; RV32-NEXT: beqz a6, .LBB48_11
+; RV32-NEXT: add a1, a1, s6
+; RV32-NEXT: add s5, a0, s5
+; RV32-NEXT: sltu a2, s5, a0
+; RV32-NEXT: add a0, s5, s1
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: add a1, a1, s7
+; RV32-NEXT: sltu a2, a0, s5
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: .LBB48_6: # %fp-to-i-cleanup
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: beq s3, a3, .LBB48_8
+; RV32-NEXT: # %bb.7: # %fp-to-i-cleanup
+; RV32-NEXT: sltu a4, s3, a3
+; RV32-NEXT: or a5, a0, a1
+; RV32-NEXT: bnez a5, .LBB48_9
+; RV32-NEXT: j .LBB48_10
+; RV32-NEXT: .LBB48_8:
+; RV32-NEXT: sltiu a4, s2, -1
+; RV32-NEXT: or a5, a0, a1
+; RV32-NEXT: beqz a5, .LBB48_10
+; RV32-NEXT: .LBB48_9: # %fp-to-i-cleanup
+; RV32-NEXT: srli a4, a1, 31
; RV32-NEXT: .LBB48_10: # %fp-to-i-cleanup
-; RV32-NEXT: srli a5, a2, 31
-; RV32-NEXT: .LBB48_11: # %fp-to-i-cleanup
-; RV32-NEXT: neg a6, a5
-; RV32-NEXT: addi a7, a5, -1
-; RV32-NEXT: bnez a5, .LBB48_13
-; RV32-NEXT: # %bb.12: # %fp-to-i-cleanup
-; RV32-NEXT: mv a1, a4
-; RV32-NEXT: .LBB48_13: # %fp-to-i-cleanup
-; RV32-NEXT: or a4, a7, s2
-; RV32-NEXT: and a2, a6, a2
-; RV32-NEXT: and a5, a6, a0
-; RV32-NEXT: beq a1, a3, .LBB48_15
-; RV32-NEXT: # %bb.14: # %fp-to-i-cleanup
-; RV32-NEXT: sltu a0, a3, a1
-; RV32-NEXT: j .LBB48_16
-; RV32-NEXT: .LBB48_15:
-; RV32-NEXT: snez a0, a4
-; RV32-NEXT: .LBB48_16: # %fp-to-i-cleanup
-; RV32-NEXT: and a5, a5, a2
-; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a5, a3, .LBB48_18
-; RV32-NEXT: # %bb.17: # %fp-to-i-cleanup
-; RV32-NEXT: srli a2, a2, 31
-; RV32-NEXT: xori a0, a2, 1
-; RV32-NEXT: .LBB48_18: # %fp-to-i-cleanup
-; RV32-NEXT: bnez a0, .LBB48_20
-; RV32-NEXT: # %bb.19: # %fp-to-i-cleanup
-; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: .LBB48_20: # %fp-to-i-cleanup
+; RV32-NEXT: neg a5, a4
+; RV32-NEXT: addi a6, a4, -1
+; RV32-NEXT: bnez a4, .LBB48_12
+; RV32-NEXT: # %bb.11: # %fp-to-i-cleanup
+; RV32-NEXT: mv s3, a3
+; RV32-NEXT: .LBB48_12: # %fp-to-i-cleanup
+; RV32-NEXT: or a3, a6, s2
+; RV32-NEXT: and a1, a5, a1
+; RV32-NEXT: and a4, a5, a0
+; RV32-NEXT: beq s3, a2, .LBB48_14
+; RV32-NEXT: # %bb.13: # %fp-to-i-cleanup
+; RV32-NEXT: sltu a0, a2, s3
+; RV32-NEXT: j .LBB48_15
+; RV32-NEXT: .LBB48_14:
+; RV32-NEXT: snez a0, a3
+; RV32-NEXT: .LBB48_15: # %fp-to-i-cleanup
+; RV32-NEXT: and a4, a4, a1
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beq a4, a2, .LBB48_17
+; RV32-NEXT: # %bb.16: # %fp-to-i-cleanup
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: xori a0, a1, 1
+; RV32-NEXT: .LBB48_17: # %fp-to-i-cleanup
+; RV32-NEXT: bnez a0, .LBB48_19
+; RV32-NEXT: # %bb.18: # %fp-to-i-cleanup
+; RV32-NEXT: lui s3, 524288
+; RV32-NEXT: .LBB48_19: # %fp-to-i-cleanup
; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: mv a1, s3
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 116(sp) # 4-byte Folded Reload
@@ -7551,6 +7545,16 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: addi sp, sp, 128
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
+; RV32-NEXT: .LBB48_20: # %fp-to-i-if-then5
+; RV32-NEXT: .cfi_restore_state
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: xori a2, a1, 1
+; RV32-NEXT: addi s2, a1, -1
+; RV32-NEXT: sub a1, a0, a2
+; RV32-NEXT: mv s3, s2
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: j .LBB48_6
;
; RV64-LABEL: stest_f32i64_mm:
; RV64: # %bb.0: # %entry
@@ -7599,6 +7603,7 @@ define i64 @utest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset s9, -44
; RV32-NEXT: .cfi_offset s10, -48
; RV32-NEXT: .cfi_offset s11, -52
+; RV32-NEXT: .cfi_remember_state
; RV32-NEXT: fmv.x.w a1, fa0
; RV32-NEXT: slli a0, a1, 1
; RV32-NEXT: srli a0, a0, 24
@@ -7609,23 +7614,14 @@ define i64 @utest_f32i64_mm(float %x) {
; RV32-NEXT: li s9, 0
; RV32-NEXT: li a0, 0
; RV32-NEXT: li a1, 0
-; RV32-NEXT: j .LBB49_7
+; RV32-NEXT: j .LBB49_6
; RV32-NEXT: .LBB49_2: # %fp-to-i-if-end
; RV32-NEXT: addi a2, a0, -255
; RV32-NEXT: sltu a3, a2, a0
; RV32-NEXT: sltiu a2, a2, -128
; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB49_4
-; RV32-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: xori a2, a1, 1
-; RV32-NEXT: addi s2, a1, -1
-; RV32-NEXT: sub a1, a0, a2
-; RV32-NEXT: mv s9, s2
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: j .LBB49_7
-; RV32-NEXT: .LBB49_4: # %fp-to-i-if-end9
+; RV32-NEXT: bnez a2, .LBB49_7
+; RV32-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32-NEXT: srai s0, a1, 31
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: lui a2, 2048
@@ -7633,8 +7629,8 @@ define i64 @utest_f32i64_mm(float %x) {
; RV32-NEXT: srli a1, a1, 9
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: ori s8, s0, 1
-; RV32-NEXT: bltu a3, a0, .LBB49_6
-; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT: bltu a3, a0, .LBB49_5
+; RV32-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: sw zero, 28(sp)
; RV32-NEXT: sw zero, 32(sp)
@@ -7726,8 +7722,8 @@ define i64 @utest_f32i64_mm(float %x) {
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, a1, a5
-; RV32-NEXT: j .LBB49_7
-; RV32-NEXT: .LBB49_6: # %fp-to-i-if-else
+; RV32-NEXT: j .LBB49_6
+; RV32-NEXT: .LBB49_5: # %fp-to-i-if-else
; RV32-NEXT: sw a1, 56(sp)
; RV32-NEXT: sw zero, 60(sp)
; RV32-NEXT: sw zero, 64(sp)
@@ -7813,7 +7809,7 @@ define i64 @utest_f32i64_mm(float %x) {
; RV32-NEXT: add a1, a1, s7
; RV32-NEXT: sltu a2, a0, s4
; RV32-NEXT: add a1, a1, a2
-; RV32-NEXT: .LBB49_7: # %fp-to-i-cleanup
+; RV32-NEXT: .LBB49_6: # %fp-to-i-cleanup
; RV32-NEXT: or a2, a1, a0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: seqz a2, a2
@@ -7853,6 +7849,16 @@ define i64 @utest_f32i64_mm(float %x) {
; RV32-NEXT: addi sp, sp, 128
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
+; RV32-NEXT: .LBB49_7: # %fp-to-i-if-then5
+; RV32-NEXT: .cfi_restore_state
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: xori a2, a1, 1
+; RV32-NEXT: addi s2, a1, -1
+; RV32-NEXT: sub a1, a0, a2
+; RV32-NEXT: mv s9, s2
+; RV32-NEXT: mv a0, s2
+; RV32-NEXT: j .LBB49_6
;
; RV64-LABEL: utest_f32i64_mm:
; RV64: # %bb.0: # %entry
@@ -7917,24 +7923,14 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV32-NEXT: li s9, 0
; RV32-NEXT: li a1, 0
; RV32-NEXT: li a0, 0
-; RV32-NEXT: j .LBB50_6
+; RV32-NEXT: j .LBB50_8
; RV32-NEXT: .LBB50_2: # %fp-to-i-if-end
; RV32-NEXT: addi a2, a0, -255
; RV32-NEXT: sltu a3, a2, a0
; RV32-NEXT: sltiu a2, a2, -128
; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: beqz a2, .LBB50_4
-; RV32-NEXT: # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: xori a2, a1, 1
-; RV32-NEXT: addi s2, a1, -1
-; RV32-NEXT: sub a0, a0, a2
-; RV32-NEXT: mv s9, s2
-; RV32-NEXT: mv a1, s2
-; RV32-NEXT: beqz a0, .LBB50_6
-; RV32-NEXT: j .LBB50_8
-; RV32-NEXT: .LBB50_4: # %fp-to-i-if-end9
+; RV32-NEXT: bnez a2, .LBB50_7
+; RV32-NEXT: # %bb.3: # %fp-to-i-if-end9
; RV32-NEXT: srai s0, a1, 31
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: lui a2, 2048
@@ -7942,8 +7938,8 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV32-NEXT: srli a1, a1, 9
; RV32-NEXT: or a1, a1, a2
; RV32-NEXT: ori s8, s0, 1
-; RV32-NEXT: bltu a3, a0, .LBB50_7
-; RV32-NEXT: # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT: bltu a3, a0, .LBB50_5
+; RV32-NEXT: # %bb.4: # %fp-to-i-if-then12
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: sw zero, 28(sp)
; RV32-NEXT: sw zero, 32(sp)
@@ -8035,11 +8031,9 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV32-NEXT: add a0, s6, a0
; RV32-NEXT: add a0, a2, a0
; RV32-NEXT: add a0, a0, a4
-; RV32-NEXT: bnez a0, .LBB50_8
-; RV32-NEXT: .LBB50_6:
-; RV32-NEXT: seqz a2, a1
-; RV32-NEXT: j .LBB50_9
-; RV32-NEXT: .LBB50_7: # %fp-to-i-if-else
+; RV32-NEXT: bnez a0, .LBB50_6
+; RV32-NEXT: j .LBB50_8
+; RV32-NEXT: .LBB50_5: # %fp-to-i-if-else
; RV32-NEXT: sw a1, 56(sp)
; RV32-NEXT: sw zero, 60(sp)
; RV32-NEXT: sw zero, 64(sp)
@@ -8125,9 +8119,21 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: sltu a2, a1, s4
; RV32-NEXT: add a0, a0, a2
-; RV32-NEXT: beqz a0, .LBB50_6
-; RV32-NEXT: .LBB50_8: # %fp-to-i-cleanup
+; RV32-NEXT: beqz a0, .LBB50_8
+; RV32-NEXT: .LBB50_6: # %fp-to-i-cleanup
; RV32-NEXT: srli a2, a0, 31
+; RV32-NEXT: j .LBB50_9
+; RV32-NEXT: .LBB50_7: # %fp-to-i-if-then5
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: xori a2, a1, 1
+; RV32-NEXT: addi s2, a1, -1
+; RV32-NEXT: sub a0, a0, a2
+; RV32-NEXT: mv s9, s2
+; RV32-NEXT: mv a1, s2
+; RV32-NEXT: bnez a0, .LBB50_6
+; RV32-NEXT: .LBB50_8:
+; RV32-NEXT: seqz a2, a1
; RV32-NEXT: .LBB50_9: # %fp-to-i-cleanup
; RV32-NEXT: xori a1, a1, 1
; RV32-NEXT: or a1, a1, a0
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
index d3a77de057ef5..ea893a27be3f8 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
-define i129 @halftosi129(half %a) {
+define i129 @halftosi129(half %a) !prof !0 {
; CHECK-LABEL: @halftosi129(
; CHECK-NEXT: [[TMP1:%.*]] = fptosi half [[A:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i129
@@ -12,29 +12,29 @@ define i129 @halftosi129(half %a) {
ret i129 %conv
}
-define i129 @floattosi129(float %a) {
+define i129 @floattosi129(float %a) !prof !0 {
; CHECK-LABEL: @floattosi129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 23
; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 255
; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 8388607
; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 8388608
; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 127
-; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]], !prof [[PROF1]]
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -256
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: fp-to-i-if-then5:
-; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456, !prof [[PROF1]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-end9:
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i129 [[TMP5]], 150
-; CHECK-NEXT: br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]], !prof [[PROF1]]
; CHECK: fp-to-i-if-then12:
; CHECK-NEXT: [[TMP13:%.*]] = sub i129 150, [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = lshr i129 [[TMP7]], [[TMP13]]
@@ -69,7 +69,7 @@ define i129 @doubletosi129(double %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -1152
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -111,7 +111,7 @@ define i129 @x86_fp80tosi129(x86_fp80 %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -16512
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -152,7 +152,7 @@ define i129 @fp128tosi129(fp128 %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -16512
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -194,7 +194,7 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
; CHECK: fp-to-i-if-end2:
; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -256
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then53:
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
@@ -228,7 +228,7 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP32:%.*]] = add i129 [[TMP28]], -256
; CHECK-NEXT: [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129
-; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -253,3 +253,10 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
%conv = fptosi <2 x float> %a to <2 x i129>
ret <2 x i129> %conv
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"unknown", !"expand-ir-insts"}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
index 07de91d404988..816f2a015e725 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
-define i129 @halftoui129(half %a) {
+define i129 @halftoui129(half %a) !prof !0 {
; CHECK-LABEL: @halftoui129(
; CHECK-NEXT: [[TMP1:%.*]] = fptoui half [[A:%.*]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
@@ -12,29 +12,29 @@ define i129 @halftoui129(half %a) {
ret i129 %conv
}
-define i129 @floattoui129(float %a) {
+define i129 @floattoui129(float %a) !prof !0 {
; CHECK-LABEL: @floattoui129(
; CHECK-NEXT: fp-to-i-entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
-; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = lshr i129 [[TMP1]], 23
; CHECK-NEXT: [[TMP5:%.*]] = and i129 [[TMP4]], 255
; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP1]], 8388607
; CHECK-NEXT: [[TMP7:%.*]] = or i129 [[TMP6]], 8388608
; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 127
-; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]], !prof [[PROF1]]
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -256
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: fp-to-i-if-then5:
-; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
+; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456, !prof [[PROF1]]
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
; CHECK: fp-to-i-if-end9:
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i129 [[TMP5]], 150
-; CHECK-NEXT: br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]], !prof [[PROF1]]
; CHECK: fp-to-i-if-then12:
; CHECK-NEXT: [[TMP13:%.*]] = sub i129 150, [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = lshr i129 [[TMP7]], [[TMP13]]
@@ -69,7 +69,7 @@ define i129 @doubletoui129(double %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -1152
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -111,7 +111,7 @@ define i129 @x86_fp80toui129(x86_fp80 %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -16512
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -152,7 +152,7 @@ define i129 @fp128toui129(fp128 %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP9:%.*]] = add i129 [[TMP5]], -16512
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -194,7 +194,7 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
; CHECK: fp-to-i-if-end2:
; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -256
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then53:
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]]
@@ -228,7 +228,7 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
; CHECK: fp-to-i-if-end:
; CHECK-NEXT: [[TMP32:%.*]] = add i129 [[TMP28]], -256
; CHECK-NEXT: [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129
-; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
; CHECK: fp-to-i-if-then5:
; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]]
@@ -253,3 +253,10 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
%conv = fptoui <2 x float> %a to <2 x i129>
ret <2 x i129> %conv
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"unknown", !"expand-ir-insts"}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll
index fab6e431872e7..d035d6e11fc05 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll
@@ -1,12 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
-define half @si129tohalf(i129 %a) {
+define half @si129tohalf(i129 %a) !prof !0 {
; CHECK-LABEL: @si129tohalf(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -16,12 +16,12 @@ define half @si129tohalf(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3:![0-9]+]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -51,7 +51,7 @@ define half @si129tohalf(i129 %a) {
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP32:%.*]] = ashr i129 [[TMP26]], 3
; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -91,7 +91,7 @@ define float @si129tofloat(i129 %a) {
; CHECK-LABEL: @si129tofloat(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -101,12 +101,12 @@ define float @si129tofloat(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -136,7 +136,7 @@ define float @si129tofloat(i129 %a) {
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP32:%.*]] = ashr i129 [[TMP26]], 3
; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -175,7 +175,7 @@ define double @si129todouble(i129 %a) {
; CHECK-LABEL: @si129todouble(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -185,12 +185,12 @@ define double @si129todouble(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 53
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 54, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 55, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -220,7 +220,7 @@ define double @si129todouble(i129 %a) {
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i64
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP32:%.*]] = ashr i129 [[TMP26]], 3
; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i64
@@ -264,7 +264,7 @@ define x86_fp80 @si129tox86_fp80(i129 %a) {
; CHECK-LABEL: @si129tox86_fp80(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -274,12 +274,12 @@ define x86_fp80 @si129tox86_fp80(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -307,7 +307,7 @@ define x86_fp80 @si129tox86_fp80(i129 %a) {
; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP30:%.*]] = ashr i129 [[TMP24]], 3
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -348,7 +348,7 @@ define fp128 @si129tofp128(i129 %a) {
; CHECK-LABEL: @si129tofp128(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -358,12 +358,12 @@ define fp128 @si129tofp128(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[TMP3]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -391,7 +391,7 @@ define fp128 @si129tofp128(i129 %a) {
; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP30:%.*]] = ashr i129 [[TMP24]], 3
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -432,7 +432,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: itofp-entryitofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i129> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i129 [[TMP0]], 0
-; CHECK-NEXT: br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end2:
; CHECK-NEXT: [[TMP2:%.*]] = ashr i129 [[TMP0]], 128
; CHECK-NEXT: [[TMP3:%.*]] = xor i129 [[TMP2]], [[TMP0]]
@@ -442,12 +442,12 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 129, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 128, [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 24
-; CHECK-NEXT: br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then43:
; CHECK-NEXT: switch i32 [[TMP7]], label [[ITOFP_SW_DEFAULT5:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB4:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG6:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb4:
; CHECK-NEXT: [[TMP10:%.*]] = shl i129 [[TMP4]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG6]]
@@ -477,7 +477,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP30:%.*]] = trunc i129 [[TMP28]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = lshr i129 [[TMP28]], 32
; CHECK-NEXT: [[TMP32:%.*]] = trunc i129 [[TMP31]] to i32
-; CHECK-NEXT: br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]]
+; CHECK-NEXT: br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then207:
; CHECK-NEXT: [[TMP33:%.*]] = ashr i129 [[TMP27]], 3
; CHECK-NEXT: [[TMP34:%.*]] = trunc i129 [[TMP33]] to i32
@@ -509,7 +509,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i129> [[A]], i64 1
; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i129 [[TMP55]], 0
-; CHECK-NEXT: br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP57:%.*]] = ashr i129 [[TMP55]], 128
; CHECK-NEXT: [[TMP58:%.*]] = xor i129 [[TMP57]], [[TMP55]]
@@ -519,12 +519,12 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP62:%.*]] = sub i32 129, [[TMP61]]
; CHECK-NEXT: [[TMP63:%.*]] = sub i32 128, [[TMP61]]
; CHECK-NEXT: [[TMP64:%.*]] = icmp sgt i32 [[TMP62]], 24
-; CHECK-NEXT: br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP62]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP65:%.*]] = shl i129 [[TMP59]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -554,7 +554,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP85:%.*]] = trunc i129 [[TMP83]] to i32
; CHECK-NEXT: [[TMP86:%.*]] = lshr i129 [[TMP83]], 32
; CHECK-NEXT: [[TMP87:%.*]] = trunc i129 [[TMP86]] to i32
-; CHECK-NEXT: br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP88:%.*]] = ashr i129 [[TMP82]], 3
; CHECK-NEXT: [[TMP89:%.*]] = trunc i129 [[TMP88]] to i32
@@ -589,3 +589,13 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
%conv = sitofp <2 x i129> %a to <2 x float>
ret <2 x float> %conv
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1, i32 1}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll
index 3a3a8e40ea8d1..562200989ae8c 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll
@@ -1,12 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
-define half @ui129tohalf(i129 %a) {
+define half @ui129tohalf(i129 %a) !prof !0 {
; CHECK-LABEL: @ui129tohalf(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -16,12 +16,12 @@ define half @ui129tohalf(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3:![0-9]+]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -51,7 +51,7 @@ define half @ui129tohalf(i129 %a) {
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP26]], 3
; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -91,7 +91,7 @@ define float @ui129tofloat(i129 %a) {
; CHECK-LABEL: @ui129tofloat(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -101,12 +101,12 @@ define float @ui129tofloat(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -136,7 +136,7 @@ define float @ui129tofloat(i129 %a) {
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP26]], 3
; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -175,7 +175,7 @@ define double @ui129todouble(i129 %a) {
; CHECK-LABEL: @ui129todouble(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -185,12 +185,12 @@ define double @ui129todouble(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i32 129, [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 128, [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 53
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 54, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 55, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -220,7 +220,7 @@ define double @ui129todouble(i129 %a) {
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP27]] to i64
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP32:%.*]] = lshr i129 [[TMP26]], 3
; CHECK-NEXT: [[TMP33:%.*]] = trunc i129 [[TMP32]] to i64
@@ -264,7 +264,7 @@ define x86_fp80 @ui129tox86_fp80(i129 %a) {
; CHECK-LABEL: @ui129tox86_fp80(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -274,12 +274,12 @@ define x86_fp80 @ui129tox86_fp80(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -307,7 +307,7 @@ define x86_fp80 @ui129tox86_fp80(i129 %a) {
; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP24]], 3
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -348,7 +348,7 @@ define fp128 @ui129tofp128(i129 %a) {
; CHECK-LABEL: @ui129tofp128(
; CHECK-NEXT: itofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP1:%.*]] = ashr i129 [[A]], 128
; CHECK-NEXT: [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -358,12 +358,12 @@ define fp128 @ui129tofp128(i129 %a) {
; CHECK-NEXT: [[TMP6:%.*]] = sub i129 129, [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i129 128, [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i129 114, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP9:%.*]] = shl i129 [[A]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -391,7 +391,7 @@ define fp128 @ui129tofp128(i129 %a) {
; CHECK-NEXT: [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
; CHECK-NEXT: [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP30:%.*]] = lshr i129 [[TMP24]], 3
; CHECK-NEXT: [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -432,7 +432,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: itofp-entryitofp-entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i129> [[A:%.*]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i129 [[TMP0]], 0
-; CHECK-NEXT: br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end2:
; CHECK-NEXT: [[TMP2:%.*]] = ashr i129 [[TMP0]], 128
; CHECK-NEXT: [[TMP3:%.*]] = xor i129 [[TMP2]], [[TMP0]]
@@ -442,12 +442,12 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 129, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 128, [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 24
-; CHECK-NEXT: br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then43:
; CHECK-NEXT: switch i32 [[TMP7]], label [[ITOFP_SW_DEFAULT5:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB4:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG6:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb4:
; CHECK-NEXT: [[TMP10:%.*]] = shl i129 [[TMP0]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG6]]
@@ -477,7 +477,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP30:%.*]] = trunc i129 [[TMP28]] to i32
; CHECK-NEXT: [[TMP31:%.*]] = lshr i129 [[TMP28]], 32
; CHECK-NEXT: [[TMP32:%.*]] = trunc i129 [[TMP31]] to i32
-; CHECK-NEXT: br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]]
+; CHECK-NEXT: br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then207:
; CHECK-NEXT: [[TMP33:%.*]] = lshr i129 [[TMP27]], 3
; CHECK-NEXT: [[TMP34:%.*]] = trunc i129 [[TMP33]] to i32
@@ -509,7 +509,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i129> [[A]], i64 1
; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i129 [[TMP55]], 0
-; CHECK-NEXT: br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT: br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
; CHECK: itofp-if-end:
; CHECK-NEXT: [[TMP57:%.*]] = ashr i129 [[TMP55]], 128
; CHECK-NEXT: [[TMP58:%.*]] = xor i129 [[TMP57]], [[TMP55]]
@@ -519,12 +519,12 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP62:%.*]] = sub i32 129, [[TMP61]]
; CHECK-NEXT: [[TMP63:%.*]] = sub i32 128, [[TMP61]]
; CHECK-NEXT: [[TMP64:%.*]] = icmp sgt i32 [[TMP62]], 24
-; CHECK-NEXT: br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT: br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then4:
; CHECK-NEXT: switch i32 [[TMP62]], label [[ITOFP_SW_DEFAULT:%.*]] [
; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]]
; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3]]
; CHECK: itofp-sw-bb:
; CHECK-NEXT: [[TMP65:%.*]] = shl i129 [[TMP55]], 1
; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]]
@@ -554,7 +554,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
; CHECK-NEXT: [[TMP85:%.*]] = trunc i129 [[TMP83]] to i32
; CHECK-NEXT: [[TMP86:%.*]] = lshr i129 [[TMP83]], 32
; CHECK-NEXT: [[TMP87:%.*]] = trunc i129 [[TMP86]] to i32
-; CHECK-NEXT: br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT: br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
; CHECK: itofp-if-then20:
; CHECK-NEXT: [[TMP88:%.*]] = lshr i129 [[TMP82]], 3
; CHECK-NEXT: [[TMP89:%.*]] = trunc i129 [[TMP88]] to i32
@@ -589,3 +589,13 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
%conv = uitofp <2 x i129> %a to <2 x float>
ret <2 x float> %conv
}
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1, i32 1}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll
index 5b622c1ad77eb..919fb0aa13c50 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll
@@ -1,16 +1,16 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
; expand-ir-insts must also run with optnone
; Function Attrs: noinline optnone
-define double @main(i224 %0) #0 {
+define double @main(i224 %0) #0 !prof !0 {
; CHECK-LABEL: define double @main(
-; CHECK-SAME: i224 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: i224 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: [[ENTRYITOFP_ENTRY:.*]]:
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i224 [[TMP0]], 0
-; CHECK-NEXT: br i1 [[TMP1]], label %[[ITOFP_RETURN:.*]], label %[[ITOFP_IF_END:.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ITOFP_RETURN:.*]], label %[[ITOFP_IF_END:.*]], !prof [[PROF1:![0-9]+]]
; CHECK: [[ITOFP_IF_END]]:
; CHECK-NEXT: [[TMP2:%.*]] = ashr i224 [[TMP0]], 223
; CHECK-NEXT: [[TMP3:%.*]] = xor i224 [[TMP2]], [[TMP0]]
@@ -20,12 +20,12 @@ define double @main(i224 %0) #0 {
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 224, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 223, [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 53
-; CHECK-NEXT: br i1 [[TMP9]], label %[[ITOFP_IF_THEN4:.*]], label %[[ITOFP_IF_ELSE:.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[ITOFP_IF_THEN4:.*]], label %[[ITOFP_IF_ELSE:.*]], !prof [[PROF2:![0-9]+]]
; CHECK: [[ITOFP_IF_THEN4]]:
; CHECK-NEXT: switch i32 [[TMP7]], label %[[ITOFP_SW_DEFAULT:.*]] [
; CHECK-NEXT: i32 54, label %[[ITOFP_SW_BB:.*]]
; CHECK-NEXT: i32 55, label %[[ITOFP_SW_EPILOG:.*]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: ], !prof [[PROF3:![0-9]+]]
; CHECK: [[ITOFP_SW_BB]]:
; CHECK-NEXT: [[TMP10:%.*]] = shl i224 [[TMP4]], 1
; CHECK-NEXT: br label %[[ITOFP_SW_EPILOG]]
@@ -55,7 +55,7 @@ define double @main(i224 %0) #0 {
; CHECK-NEXT: [[TMP30:%.*]] = trunc i224 [[TMP28]] to i64
; CHECK-NEXT: [[TMP31:%.*]] = lshr i224 [[TMP28]], 32
; CHECK-NEXT: [[TMP32:%.*]] = trunc i224 [[TMP31]] to i32
-; CHECK-NEXT: br i1 [[TMP29]], label %[[ITOFP_IF_END26:.*]], label %[[ITOFP_IF_THEN20:.*]]
+; CHECK-NEXT: br i1 [[TMP29]], label %[[ITOFP_IF_END26:.*]], label %[[ITOFP_IF_THEN20:.*]], !prof [[PROF2]]
; CHECK: [[ITOFP_IF_THEN20]]:
; CHECK-NEXT: [[TMP33:%.*]] = ashr i224 [[TMP27]], 3
; CHECK-NEXT: [[TMP34:%.*]] = trunc i224 [[TMP33]] to i64
@@ -97,3 +97,14 @@ entry:
}
attributes #0 = { noinline optnone }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0]] = { noinline optnone }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1, i32 1}
+;.
diff --git a/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp b/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp
index b698756dd75e9..5a4679ef31422 100644
--- a/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp
+++ b/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp
@@ -170,24 +170,16 @@ static void collectUnderlyingAddressValues(BlockArgument arg, unsigned maxDepth,
// the entry block.
SmallVector<RegionSuccessor> successors;
branch.getSuccessorRegions(RegionBranchPoint::parent(), successors);
- RegionSuccessor regionSuccessor(region);
- bool found = false;
for (RegionSuccessor &successor : successors) {
if (successor.getSuccessor() == region) {
LDBG() << " Found matching region successor: " << successor;
- found = true;
- regionSuccessor = successor;
- break;
+ return collectUnderlyingAddressValues2(
+ branch, successor, arg, argNumber, maxDepth, visited, output);
}
}
- if (!found) {
- LDBG()
- << " No matching region successor found, adding argument to output";
- output.push_back(arg);
- return;
- }
- return collectUnderlyingAddressValues2(
- branch, regionSuccessor, arg, argNumber, maxDepth, visited, output);
+ LDBG() << " No matching region successor found, adding argument to output";
+ output.push_back(arg);
+ return;
}
LDBG()
More information about the Mlir-commits
mailing list