[Mlir-commits] [llvm] [mlir] [profcheck] Fix profile metadata propagation for Large FP Operations (PR #175864)

Jin Huang llvmlistbot at llvm.org
Mon Jan 26 01:20:03 PST 2026


https://github.com/jinhuang1102 updated https://github.com/llvm/llvm-project/pull/175864

>From 76053abe00d928f2034caa15f150c84a8dc8505e Mon Sep 17 00:00:00 2001
From: lonely eagle <2020382038 at qq.com>
Date: Mon, 26 Jan 2026 16:51:44 +0800
Subject: [PATCH] [mlir][analysis]  Cleanup collectUnderlyingAddressValues
 (NFC) (#177905)

---
 llvm/lib/CodeGen/ExpandIRInsts.cpp            |   77 +-
 llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll     |  112 +-
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        |  694 +++---
 llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll | 1608 +++++++-------
 llvm/test/CodeGen/RISCV/fpclamptosat.ll       | 1890 +++++++++--------
 .../X86/expand-large-fp-convert-fptosi129.ll  |   33 +-
 .../X86/expand-large-fp-convert-fptoui129.ll  |   33 +-
 .../X86/expand-large-fp-convert-si129tofp.ll  |   70 +-
 .../X86/expand-large-fp-convert-ui129tofp.ll  |   70 +-
 .../X86/expand-large-fp-optnone.ll            |   25 +-
 .../AliasAnalysis/LocalAliasAnalysis.cpp      |   18 +-
 11 files changed, 2393 insertions(+), 2237 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandIRInsts.cpp b/llvm/lib/CodeGen/ExpandIRInsts.cpp
index dac4d0fa466d4..7ccbd6ea0b335 100644
--- a/llvm/lib/CodeGen/ExpandIRInsts.cpp
+++ b/llvm/lib/CodeGen/ExpandIRInsts.cpp
@@ -40,10 +40,15 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
@@ -56,6 +61,10 @@
 
 using namespace llvm;
 
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
 static cl::opt<unsigned>
     ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
                         cl::init(llvm::IntegerType::MAX_INT_BITS),
@@ -69,6 +78,7 @@ static cl::opt<unsigned>
                               "more than <N> bits are expanded."));
 
 namespace {
+
 bool isConstantPowerOfTwo(llvm::Value *V, bool SignedOp) {
   auto *C = dyn_cast<ConstantInt>(V);
   if (!C)
@@ -571,36 +581,60 @@ static void expandFPToI(Instruction *FPToI) {
       ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
   Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
                                      ConstantInt::getSigned(IntTy, -1));
+  applyProfMetadataIfEnabled(Sign, [&](Instruction *Inst) {
+    setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+  });
   Value *And =
       Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
   Value *And2 = Builder.CreateAnd(
       And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
   Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
   Value *Or = Builder.CreateOr(Abs, ImplicitBit);
+  // The comparison checks the floating-point value is in the range (-1, 1). We
+  // assume unknown (50/50) as the branch weight.
   Value *Cmp =
       Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
-  Builder.CreateCondBr(Cmp, End, IfEnd);
+  Value *CondBrEntry = Builder.CreateCondBr(Cmp, End, IfEnd);
+  applyProfMetadataIfEnabled(CondBrEntry, [&](Instruction *Inst) {
+    setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+  });
 
   // if.end:
   Builder.SetInsertPoint(IfEnd);
   Value *Add1 = Builder.CreateAdd(
       And2, ConstantInt::getSigned(
                 IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
+  // The comparison is doing the overflow check so we assume the 'true' path is
+  // unlikely.
   Value *Cmp3 = Builder.CreateICmpULT(
       Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
-  Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
+  Value *CondBrIfEnd = Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
+  applyProfMetadataIfEnabled(CondBrIfEnd, [&](Instruction *Inst) {
+    Inst->setMetadata(
+        LLVMContext::MD_prof,
+        MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+  });
 
   // if.then5:
   Builder.SetInsertPoint(IfThen5);
   Value *PosInf = Builder.CreateXor(NegOne, NegInf);
   Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
+  applyProfMetadataIfEnabled(Cond8, [&](Instruction *Inst) {
+    setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+  });
   Builder.CreateBr(End);
 
   // if.end9:
   Builder.SetInsertPoint(IfEnd9);
+  // The shift direction depends on the magnitude of the floating-point number.
+  // Given the lack of domain-specific profiles, we treat the left-shift and
+  // right-shift paths as 50/50.
   Value *Cmp10 = Builder.CreateICmpULT(
       And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
-  Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
+  Value *CondBrIfEnd9 = Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
+  applyProfMetadataIfEnabled(CondBrIfEnd9, [&](Instruction *Inst) {
+    setExplicitlyUnknownBranchWeightsIfProfiled(*Inst, DEBUG_TYPE, F);
+  });
 
   // if.then12:
   Builder.SetInsertPoint(IfThen12);
@@ -772,8 +806,15 @@ static void expandIToFP(Instruction *IToFP) {
 
   // entry:
   Builder.SetInsertPoint(Entry);
+  // We assume that the zero is an unlikely input case, so the branch to 'End'
+  // is the unlikely path.
   Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
-  Builder.CreateCondBr(Cmp, End, IfEnd);
+  Value *CondBrEntry = Builder.CreateCondBr(Cmp, End, IfEnd);
+  applyProfMetadataIfEnabled(CondBrEntry, [&](Instruction *Inst) {
+    Inst->setMetadata(
+        LLVMContext::MD_prof,
+        MDBuilder(Inst->getContext()).createUnlikelyBranchWeights());
+  });
 
   // if.end:
   Builder.SetInsertPoint(IfEnd);
@@ -790,13 +831,30 @@ static void expandIToFP(Instruction *IToFP) {
                                   FloatWidth == 128 ? Call : Cast);
   Value *Cmp3 = Builder.CreateICmpSGT(
       Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
-  Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
+  // This branch handles the rare case where rounding the mantissa causes a
+  // carry-out at the most significant bit, necessitating an increment of the
+  // exponent. This is rare case, so the True path is mared as likely.
+  Value *CondBrIfEnd = Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
+  applyProfMetadataIfEnabled(CondBrIfEnd, [&](Instruction *Inst) {
+    Inst->setMetadata(
+        LLVMContext::MD_prof,
+        MDBuilder(Inst->getContext()).createLikelyBranchWeights());
+  });
 
   // if.then4:
   Builder.SetInsertPoint(IfThen4);
   llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
   SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
   SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
+  // Add branch weights to the SwitchInst. The weights are provided for the
+  // default case first (SwDefault), followed by each explicit case in the
+  // order they were added (SwBB, then SwEpilog). Because the following cases
+  // are rare, the defalut case is given a likely weight.
+  if (!ProfcheckDisableMetadataFixes) {
+    SI->setMetadata(LLVMContext::MD_prof,
+                    MDBuilder(SI->getContext())
+                        .createBranchWeights({(1U << 20) - 1, 1, 1}));
+  }
 
   // sw.bb:
   Builder.SetInsertPoint(SwBB);
@@ -850,7 +908,14 @@ static void expandIToFP(Instruction *IToFP) {
     ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
   else
     ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
-  Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
+  // Rounding usually keeps the exponent within its current magnitude and
+  // overflow is rare. The False path is unlikely to be taken.
+  Value *CondBrSwEpilog = Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
+  applyProfMetadataIfEnabled(CondBrSwEpilog, [&](Instruction *Inst) {
+    Inst->setMetadata(
+        LLVMContext::MD_prof,
+        MDBuilder(Inst->getContext()).createLikelyBranchWeights());
+  });
 
   // if.then20
   Builder.SetInsertPoint(IfThen20);
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
index eee3352fa7452..4da7ab3f2f974 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll
@@ -15,7 +15,7 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB0_14
+; GCN-NEXT:    s_cbranch_execz .LBB0_12
 ; GCN-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GCN-NEXT:    v_sub_co_u32_e32 v4, vcc, 0, v0
 ; GCN-NEXT:    v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
@@ -42,29 +42,22 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v2
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT:  ; %bb.2: ; %itofp-if-else
-; GCN-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
-; GCN-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GCN-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
-; GCN-NEXT:    ; implicit-def: $vgpr2
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; GCN-NEXT:  ; %bb.3: ; %Flow3
+; GCN-NEXT:    s_cbranch_execnz .LBB0_13
+; GCN-NEXT:  .LBB0_2: ; %Flow3
 ; GCN-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GCN-NEXT:    v_sub_u32_e32 v6, 0x7f, v7
 ; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execz .LBB0_13
-; GCN-NEXT:  ; %bb.4: ; %NodeBlock
+; GCN-NEXT:    s_cbranch_execz .LBB0_11
+; GCN-NEXT:  ; %bb.3: ; %NodeBlock
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v2
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GCN-NEXT:    s_cbranch_execz .LBB0_8
-; GCN-NEXT:  ; %bb.5: ; %LeafBlock
+; GCN-NEXT:    s_cbranch_execz .LBB0_7
+; GCN-NEXT:  ; %bb.4: ; %LeafBlock
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v2
 ; GCN-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB0_7
-; GCN-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GCN-NEXT:    s_cbranch_execz .LBB0_6
+; GCN-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GCN-NEXT:    v_sub_u32_e32 v12, 0x66, v7
 ; GCN-NEXT:    v_sub_u32_e32 v10, 64, v12
 ; GCN-NEXT:    v_lshrrev_b64 v[8:9], v12, v[0:1]
@@ -102,13 +95,13 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_or_b32_e32 v8, v15, v0
 ; GCN-NEXT:    v_mov_b32_e32 v0, v8
 ; GCN-NEXT:    v_mov_b32_e32 v1, v9
-; GCN-NEXT:  .LBB0_7: ; %Flow1
+; GCN-NEXT:  .LBB0_6: ; %Flow1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GCN-NEXT:  .LBB0_8: ; %Flow2
+; GCN-NEXT:  .LBB0_7: ; %Flow2
 ; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GCN-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GCN-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GCN-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GCN-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GCN-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    v_lshrrev_b32_e32 v4, 2, v0
 ; GCN-NEXT:    v_and_or_b32 v0, v4, 1, v0
@@ -118,12 +111,10 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GCN-NEXT:    v_alignbit_b32 v8, v1, v0, 2
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GCN-NEXT:    v_alignbit_b32 v8, v1, v0, 3
-; GCN-NEXT:    v_mov_b32_e32 v6, v2
-; GCN-NEXT:  ; %bb.12: ; %Flow
+; GCN-NEXT:    s_cbranch_execnz .LBB0_14
+; GCN-NEXT:  .LBB0_10: ; %Flow
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT:  .LBB0_13: ; %Flow4
+; GCN-NEXT:  .LBB0_11: ; %Flow4
 ; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GCN-NEXT:    v_and_b32_e32 v0, 0x80000000, v3
 ; GCN-NEXT:    v_lshl_add_u32 v1, v6, 23, 1.0
@@ -136,10 +127,23 @@ define bfloat @sitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GCN-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GCN-NEXT:  .LBB0_14: ; %Flow5
+; GCN-NEXT:  .LBB0_12: ; %Flow5
 ; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v0, v4
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-NEXT:  .LBB0_13: ; %itofp-if-else
+; GCN-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
+; GCN-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
+; GCN-NEXT:    ; implicit-def: $vgpr2
+; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GCN-NEXT:    s_branch .LBB0_2
+; GCN-NEXT:  .LBB0_14: ; %itofp-if-then20
+; GCN-NEXT:    v_alignbit_b32 v8, v1, v0, 3
+; GCN-NEXT:    v_mov_b32_e32 v6, v2
+; GCN-NEXT:    s_branch .LBB0_10
   %cvt = sitofp i128 %x to bfloat
   ret bfloat %cvt
 }
@@ -153,7 +157,7 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB1_14
+; GCN-NEXT:    s_cbranch_execz .LBB1_12
 ; GCN-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GCN-NEXT:    v_ffbh_u32_e32 v4, v2
 ; GCN-NEXT:    v_add_u32_e32 v4, 32, v4
@@ -171,29 +175,22 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    ; implicit-def: $vgpr7
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT:  ; %bb.2: ; %itofp-if-else
-; GCN-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
-; GCN-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GCN-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
-; GCN-NEXT:    ; implicit-def: $vgpr4
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; GCN-NEXT:  ; %bb.3: ; %Flow3
+; GCN-NEXT:    s_cbranch_execnz .LBB1_13
+; GCN-NEXT:  .LBB1_2: ; %Flow3
 ; GCN-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GCN-NEXT:    v_sub_u32_e32 v5, 0x7f, v6
 ; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execz .LBB1_13
-; GCN-NEXT:  ; %bb.4: ; %NodeBlock
+; GCN-NEXT:    s_cbranch_execz .LBB1_11
+; GCN-NEXT:  ; %bb.3: ; %NodeBlock
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v4
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GCN-NEXT:    s_cbranch_execz .LBB1_8
-; GCN-NEXT:  ; %bb.5: ; %LeafBlock
+; GCN-NEXT:    s_cbranch_execz .LBB1_7
+; GCN-NEXT:  ; %bb.4: ; %LeafBlock
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v4
 ; GCN-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB1_7
-; GCN-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GCN-NEXT:    s_cbranch_execz .LBB1_6
+; GCN-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GCN-NEXT:    v_sub_u32_e32 v11, 0x66, v6
 ; GCN-NEXT:    v_sub_u32_e32 v9, 64, v11
 ; GCN-NEXT:    v_lshrrev_b64 v[7:8], v11, v[0:1]
@@ -231,13 +228,13 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_or_b32_e32 v7, v14, v0
 ; GCN-NEXT:    v_mov_b32_e32 v0, v7
 ; GCN-NEXT:    v_mov_b32_e32 v1, v8
-; GCN-NEXT:  .LBB1_7: ; %Flow1
+; GCN-NEXT:  .LBB1_6: ; %Flow1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GCN-NEXT:  .LBB1_8: ; %Flow2
+; GCN-NEXT:  .LBB1_7: ; %Flow2
 ; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GCN-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GCN-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GCN-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GCN-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GCN-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    v_lshrrev_b32_e32 v2, 2, v0
 ; GCN-NEXT:    v_and_or_b32 v0, v2, 1, v0
@@ -247,12 +244,10 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; GCN-NEXT:    v_alignbit_b32 v7, v1, v0, 2
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GCN-NEXT:    v_alignbit_b32 v7, v1, v0, 3
-; GCN-NEXT:    v_mov_b32_e32 v5, v4
-; GCN-NEXT:  ; %bb.12: ; %Flow
+; GCN-NEXT:    s_cbranch_execnz .LBB1_14
+; GCN-NEXT:  .LBB1_10: ; %Flow
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT:  .LBB1_13: ; %Flow4
+; GCN-NEXT:  .LBB1_11: ; %Flow4
 ; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GCN-NEXT:    v_and_b32_e32 v0, 0x7fffff, v7
 ; GCN-NEXT:    v_lshl_or_b32 v0, v5, 23, v0
@@ -264,10 +259,23 @@ define bfloat @uitofp_i128_to_bf16(i128 %x) {
 ; GCN-NEXT:    v_cmp_u_f32_e32 vcc, v0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
 ; GCN-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GCN-NEXT:  .LBB1_14: ; %Flow5
+; GCN-NEXT:  .LBB1_12: ; %Flow5
 ; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v0, v4
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN-NEXT:  .LBB1_13: ; %itofp-if-else
+; GCN-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
+; GCN-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
+; GCN-NEXT:    ; implicit-def: $vgpr4
+; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; GCN-NEXT:    s_branch .LBB1_2
+; GCN-NEXT:  .LBB1_14: ; %itofp-if-then20
+; GCN-NEXT:    v_alignbit_b32 v7, v1, v0, 3
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    s_branch .LBB1_10
   %cvt = uitofp i128 %x to bfloat
   ret bfloat %cvt
 }
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 2f9182e6e7c6a..0798a906c38b3 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -11,7 +11,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB0_14
+; SDAG-NEXT:    s_cbranch_execz .LBB0_12
 ; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
 ; SDAG-NEXT:    v_sub_co_u32_e32 v4, vcc, 0, v0
 ; SDAG-NEXT:    v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
@@ -38,29 +38,22 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
-; SDAG-NEXT:    ; implicit-def: $vgpr2
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_cbranch_execnz .LBB0_13
+; SDAG-NEXT:  .LBB0_2: ; %Flow3
 ; SDAG-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    v_sub_u32_e32 v6, 0x7f, v7
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT:    s_cbranch_execz .LBB0_13
-; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB0_11
+; SDAG-NEXT:  ; %bb.3: ; %NodeBlock
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT:    s_cbranch_execz .LBB0_8
-; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB0_7
+; SDAG-NEXT:  ; %bb.4: ; %LeafBlock
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB0_7
-; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    s_cbranch_execz .LBB0_6
+; SDAG-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; SDAG-NEXT:    v_sub_u32_e32 v12, 0x66, v7
 ; SDAG-NEXT:    v_sub_u32_e32 v10, 64, v12
 ; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v12, v[0:1]
@@ -98,13 +91,13 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v8, v15, v0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v8
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v9
-; SDAG-NEXT:  .LBB0_7: ; %Flow1
+; SDAG-NEXT:  .LBB0_6: ; %Flow1
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT:  .LBB0_8: ; %Flow2
+; SDAG-NEXT:  .LBB0_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v4, 2, v0
 ; SDAG-NEXT:    v_and_or_b32 v0, v4, 1, v0
@@ -114,21 +107,32 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 3
-; SDAG-NEXT:    v_mov_b32_e32 v6, v2
-; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_cbranch_execnz .LBB0_14
+; SDAG-NEXT:  .LBB0_10: ; %Flow
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT:  .LBB0_13: ; %Flow4
+; SDAG-NEXT:  .LBB0_11: ; %Flow4
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x80000000, v3
 ; SDAG-NEXT:    v_lshl_add_u32 v1, v6, 23, 1.0
 ; SDAG-NEXT:    v_and_b32_e32 v2, 0x7fffff, v8
 ; SDAG-NEXT:    v_or3_b32 v4, v2, v0, v1
-; SDAG-NEXT:  .LBB0_14: ; %Flow5
+; SDAG-NEXT:  .LBB0_12: ; %Flow5
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB0_13: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr2
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:    s_branch .LBB0_2
+; SDAG-NEXT:  .LBB0_14: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v6, v2
+; SDAG-NEXT:    s_branch .LBB0_10
 ;
 ; GISEL-LABEL: sitofp_i128_to_f32:
 ; GISEL:       ; %bb.0: ; %itofp-entry
@@ -139,7 +143,7 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    s_mov_b32 s4, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB0_14
+; GISEL-NEXT:    s_cbranch_execz .LBB0_12
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v6, v0
@@ -166,29 +170,22 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr7
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr2
-; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_cbranch_execnz .LBB0_13
+; GISEL-NEXT:  .LBB0_2: ; %Flow3
 ; GISEL-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GISEL-NEXT:    v_sub_u32_e32 v8, 0x7f, v5
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execz .LBB0_13
-; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB0_11
+; GISEL-NEXT:  ; %bb.3: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT:    s_cbranch_execz .LBB0_8
-; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB0_7
+; GISEL-NEXT:  ; %bb.4: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB0_7
-; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    s_cbranch_execz .LBB0_6
+; GISEL-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
 ; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v4, v[0:1]
@@ -230,13 +227,13 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v4
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
-; GISEL-NEXT:  .LBB0_7: ; %Flow1
+; GISEL-NEXT:  .LBB0_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT:  .LBB0_8: ; %Flow2
+; GISEL-NEXT:  .LBB0_7: ; %Flow2
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
@@ -247,21 +244,32 @@ define float @sitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT:    v_mov_b32_e32 v8, v7
-; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_cbranch_execnz .LBB0_14
+; GISEL-NEXT:  .LBB0_10: ; %Flow
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB0_13: ; %Flow4
+; GISEL-NEXT:  .LBB0_11: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
 ; GISEL-NEXT:    v_lshl_add_u32 v1, v8, 23, 1.0
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
 ; GISEL-NEXT:    v_or3_b32 v4, v2, v0, v1
-; GISEL-NEXT:  .LBB0_14: ; %Flow5
+; GISEL-NEXT:  .LBB0_12: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB0_13: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr7
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:    s_branch .LBB0_2
+; GISEL-NEXT:  .LBB0_14: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v8, v7
+; GISEL-NEXT:    s_branch .LBB0_10
   %cvt = sitofp i128 %x to float
   ret float %cvt
 }
@@ -275,7 +283,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB1_14
+; SDAG-NEXT:    s_cbranch_execz .LBB1_12
 ; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
 ; SDAG-NEXT:    v_ffbh_u32_e32 v4, v2
 ; SDAG-NEXT:    v_add_u32_e32 v4, 32, v4
@@ -293,29 +301,22 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    ; implicit-def: $vgpr7
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
-; SDAG-NEXT:    ; implicit-def: $vgpr4
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_cbranch_execnz .LBB1_13
+; SDAG-NEXT:  .LBB1_2: ; %Flow3
 ; SDAG-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    v_sub_u32_e32 v5, 0x7f, v6
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT:    s_cbranch_execz .LBB1_13
-; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB1_11
+; SDAG-NEXT:  ; %bb.3: ; %NodeBlock
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v4
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT:    s_cbranch_execz .LBB1_8
-; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB1_7
+; SDAG-NEXT:  ; %bb.4: ; %LeafBlock
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v4
 ; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB1_7
-; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    s_cbranch_execz .LBB1_6
+; SDAG-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; SDAG-NEXT:    v_sub_u32_e32 v11, 0x66, v6
 ; SDAG-NEXT:    v_sub_u32_e32 v9, 64, v11
 ; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v11, v[0:1]
@@ -353,13 +354,13 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v7, v14, v0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v7
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v8
-; SDAG-NEXT:  .LBB1_7: ; %Flow1
+; SDAG-NEXT:  .LBB1_6: ; %Flow1
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT:  .LBB1_8: ; %Flow2
+; SDAG-NEXT:  .LBB1_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 2, v0
 ; SDAG-NEXT:    v_and_or_b32 v0, v2, 1, v0
@@ -369,20 +370,31 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 3
-; SDAG-NEXT:    v_mov_b32_e32 v5, v4
-; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_cbranch_execnz .LBB1_14
+; SDAG-NEXT:  .LBB1_10: ; %Flow
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT:  .LBB1_13: ; %Flow4
+; SDAG-NEXT:  .LBB1_11: ; %Flow4
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v7
 ; SDAG-NEXT:    v_lshl_or_b32 v0, v5, 23, v0
 ; SDAG-NEXT:    v_add_u32_e32 v4, 1.0, v0
-; SDAG-NEXT:  .LBB1_14: ; %Flow5
+; SDAG-NEXT:  .LBB1_12: ; %Flow5
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB1_13: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr4
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    s_branch .LBB1_2
+; SDAG-NEXT:  .LBB1_14: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; SDAG-NEXT:    s_branch .LBB1_10
 ;
 ; GISEL-LABEL: uitofp_i128_to_f32:
 ; GISEL:       ; %bb.0: ; %itofp-entry
@@ -393,7 +405,7 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    s_mov_b32 s4, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB1_14
+; GISEL-NEXT:    s_cbranch_execz .LBB1_12
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
 ; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
@@ -411,29 +423,22 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr6
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr2
-; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_cbranch_execnz .LBB1_13
+; GISEL-NEXT:  .LBB1_2: ; %Flow3
 ; GISEL-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v5
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execz .LBB1_13
-; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB1_11
+; GISEL-NEXT:  ; %bb.3: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT:    s_cbranch_execz .LBB1_8
-; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB1_7
+; GISEL-NEXT:  ; %bb.4: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB1_7
-; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    s_cbranch_execz .LBB1_6
+; GISEL-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
 ; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v4, v[0:1]
@@ -475,13 +480,13 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v4
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
-; GISEL-NEXT:  .LBB1_7: ; %Flow1
+; GISEL-NEXT:  .LBB1_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT:  .LBB1_8: ; %Flow2
+; GISEL-NEXT:  .LBB1_7: ; %Flow2
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
@@ -492,20 +497,31 @@ define float @uitofp_i128_to_f32(i128 %x) {
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT:    v_mov_b32_e32 v7, v6
-; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_cbranch_execnz .LBB1_14
+; GISEL-NEXT:  .LBB1_10: ; %Flow
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB1_13: ; %Flow4
+; GISEL-NEXT:  .LBB1_11: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_lshl_add_u32 v0, v7, 23, 1.0
 ; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fffff
 ; GISEL-NEXT:    v_and_or_b32 v4, v4, v1, v0
-; GISEL-NEXT:  .LBB1_14: ; %Flow5
+; GISEL-NEXT:  .LBB1_12: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB1_13: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:    s_branch .LBB1_2
+; GISEL-NEXT:  .LBB1_14: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v7, v6
+; GISEL-NEXT:    s_branch .LBB1_10
   %cvt = uitofp i128 %x to float
   ret float %cvt
 }
@@ -522,7 +538,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v1, 0
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB2_14
+; SDAG-NEXT:    s_cbranch_execz .LBB2_12
 ; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
 ; SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, 0, v4
 ; SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, 0, v5, vcc
@@ -550,30 +566,22 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v9
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; SDAG-NEXT:    ; implicit-def: $vgpr2
-; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
-; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_cbranch_execnz .LBB2_13
+; SDAG-NEXT:  .LBB2_2: ; %Flow3
 ; SDAG-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    v_sub_u32_e32 v8, 0x7f, v9
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT:    s_cbranch_execz .LBB2_13
-; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB2_11
+; SDAG-NEXT:  ; %bb.3: ; %NodeBlock
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 54, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT:    s_cbranch_execz .LBB2_8
-; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB2_7
+; SDAG-NEXT:  ; %bb.4: ; %LeafBlock
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB2_7
-; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    s_cbranch_execz .LBB2_6
+; SDAG-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; SDAG-NEXT:    v_sub_u32_e32 v12, 0x49, v9
 ; SDAG-NEXT:    v_sub_u32_e32 v10, 64, v12
 ; SDAG-NEXT:    v_lshrrev_b64 v[0:1], v12, v[4:5]
@@ -616,16 +624,16 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v5, v1
 ; SDAG-NEXT:    v_mov_b32_e32 v4, v0
 ; SDAG-NEXT:    v_mov_b32_e32 v7, v11
-; SDAG-NEXT:  .LBB2_7: ; %Flow1
+; SDAG-NEXT:  .LBB2_6: ; %Flow1
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT:  .LBB2_8: ; %Flow2
+; SDAG-NEXT:  .LBB2_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; SDAG-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v0, 31, v5
 ; SDAG-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
 ; SDAG-NEXT:    v_or_b32_e32 v6, v6, v0
-; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v0, 2, v4
 ; SDAG-NEXT:    v_and_or_b32 v0, v0, 1, v4
@@ -638,23 +646,35 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_and_b32_e32 v1, 0x800000, v5
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT:    v_lshrrev_b64 v[0:1], 3, v[4:5]
-; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 29, v6
-; SDAG-NEXT:    v_or_b32_e32 v10, v1, v4
-; SDAG-NEXT:    v_mov_b32_e32 v8, v2
-; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_cbranch_execnz .LBB2_14
+; SDAG-NEXT:  .LBB2_10: ; %Flow
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT:  .LBB2_13: ; %Flow4
+; SDAG-NEXT:  .LBB2_11: ; %Flow4
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    v_mov_b32_e32 v2, 0x3ff00000
 ; SDAG-NEXT:    v_and_b32_e32 v1, 0x80000000, v3
 ; SDAG-NEXT:    v_lshl_add_u32 v2, v8, 20, v2
 ; SDAG-NEXT:    v_and_b32_e32 v3, 0xfffff, v10
 ; SDAG-NEXT:    v_or3_b32 v1, v3, v1, v2
-; SDAG-NEXT:  .LBB2_14: ; %Flow5
+; SDAG-NEXT:  .LBB2_12: ; %Flow5
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB2_13: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v9
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[4:5]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr2
+; SDAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:    s_branch .LBB2_2
+; SDAG-NEXT:  .LBB2_14: ; %itofp-if-then20
+; SDAG-NEXT:    v_lshrrev_b64 v[0:1], 3, v[4:5]
+; SDAG-NEXT:    v_lshlrev_b32_e32 v4, 29, v6
+; SDAG-NEXT:    v_or_b32_e32 v10, v1, v4
+; SDAG-NEXT:    v_mov_b32_e32 v8, v2
+; SDAG-NEXT:    s_branch .LBB2_10
 ;
 ; GISEL-LABEL: sitofp_i128_to_f64:
 ; GISEL:       ; %bb.0: ; %itofp-entry
@@ -668,7 +688,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB2_14
+; GISEL-NEXT:    s_cbranch_execz .LBB2_12
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v6, v4
@@ -696,29 +716,22 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT:    v_add_u32_e32 v4, 0xffffffb5, v9
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v4, v[2:3]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr7
-; GISEL-NEXT:    ; implicit-def: $vgpr2
-; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_cbranch_execnz .LBB2_13
+; GISEL-NEXT:  .LBB2_2: ; %Flow3
 ; GISEL-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GISEL-NEXT:    v_sub_u32_e32 v8, 0x7f, v9
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execz .LBB2_13
-; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB2_11
+; GISEL-NEXT:  ; %bb.3: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 55, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT:    s_cbranch_execz .LBB2_8
-; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB2_7
+; GISEL-NEXT:  ; %bb.4: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB2_7
-; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GISEL-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v14, 0x49, v9
 ; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v14
 ; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v14, v[2:3]
@@ -762,12 +775,12 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v10
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v11
 ; GISEL-NEXT:    v_mov_b32_e32 v5, v12
-; GISEL-NEXT:  .LBB2_7: ; %Flow1
+; GISEL-NEXT:  .LBB2_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT:  .LBB2_8: ; %Flow2
+; GISEL-NEXT:  .LBB2_7: ; %Flow2
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT:    s_cbranch_execz .LBB2_10
-; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    s_cbranch_execz .LBB2_9
+; GISEL-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[2:3]
 ; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 31, v3
@@ -776,7 +789,7 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v2
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v0
-; GISEL-NEXT:  .LBB2_10: ; %itofp-sw-epilog
+; GISEL-NEXT:  .LBB2_9: ; %itofp-sw-epilog
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_bfe_u32 v0, v2, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
@@ -789,22 +802,33 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
 ; GISEL-NEXT:    v_lshl_or_b32 v10, v4, 30, v1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 3, v[2:3]
-; GISEL-NEXT:    v_mov_b32_e32 v8, v7
-; GISEL-NEXT:    v_lshl_or_b32 v10, v4, 29, v1
-; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_cbranch_execnz .LBB2_14
+; GISEL-NEXT:  .LBB2_10: ; %Flow
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB2_13: ; %Flow4
+; GISEL-NEXT:  .LBB2_11: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff00000
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0x80000000, v6
 ; GISEL-NEXT:    v_lshl_add_u32 v2, v8, 20, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, 0xfffff, v10
 ; GISEL-NEXT:    v_or3_b32 v1, v3, v1, v2
-; GISEL-NEXT:  .LBB2_14: ; %Flow5
+; GISEL-NEXT:  .LBB2_12: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB2_13: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v4, 0xffffffb5, v9
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v4, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr7
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:    s_branch .LBB2_2
+; GISEL-NEXT:  .LBB2_14: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 3, v[2:3]
+; GISEL-NEXT:    v_mov_b32_e32 v8, v7
+; GISEL-NEXT:    v_lshl_or_b32 v10, v4, 29, v1
+; GISEL-NEXT:    s_branch .LBB2_10
   %cvt = sitofp i128 %x to double
   ret double %cvt
 }
@@ -819,7 +843,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v5, 0
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB3_14
+; SDAG-NEXT:    s_cbranch_execz .LBB3_12
 ; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
 ; SDAG-NEXT:    v_ffbh_u32_e32 v4, v2
 ; SDAG-NEXT:    v_add_u32_e32 v4, 32, v4
@@ -838,30 +862,22 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
-; SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; SDAG-NEXT:    ; implicit-def: $vgpr6
-; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_cbranch_execnz .LBB3_13
+; SDAG-NEXT:  .LBB3_2: ; %Flow3
 ; SDAG-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    v_sub_u32_e32 v7, 0x7f, v8
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT:    s_cbranch_execz .LBB3_13
-; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB3_11
+; SDAG-NEXT:  ; %bb.3: ; %NodeBlock
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 54, v6
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT:    s_cbranch_execz .LBB3_8
-; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB3_7
+; SDAG-NEXT:  ; %bb.4: ; %LeafBlock
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v6
 ; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB3_7
-; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    s_cbranch_execz .LBB3_6
+; SDAG-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; SDAG-NEXT:    v_sub_u32_e32 v11, 0x49, v8
 ; SDAG-NEXT:    v_sub_u32_e32 v9, 64, v11
 ; SDAG-NEXT:    v_lshrrev_b64 v[4:5], v11, v[0:1]
@@ -904,16 +920,16 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v4
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v5
 ; SDAG-NEXT:    v_mov_b32_e32 v3, v10
-; SDAG-NEXT:  .LBB3_7: ; %Flow1
+; SDAG-NEXT:  .LBB3_6: ; %Flow1
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT:  .LBB3_8: ; %Flow2
+; SDAG-NEXT:  .LBB3_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; SDAG-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 31, v1
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
 ; SDAG-NEXT:    v_or_b32_e32 v2, v2, v3
-; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v3, 2, v0
 ; SDAG-NEXT:    v_and_or_b32 v0, v3, 1, v0
@@ -925,22 +941,34 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
 ; SDAG-NEXT:    v_alignbit_b32 v9, v2, v1, 2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
-; SDAG-NEXT:    v_alignbit_b32 v9, v2, v1, 3
-; SDAG-NEXT:    v_mov_b32_e32 v7, v6
-; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_cbranch_execnz .LBB3_14
+; SDAG-NEXT:  .LBB3_10: ; %Flow
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT:  .LBB3_13: ; %Flow4
+; SDAG-NEXT:  .LBB3_11: ; %Flow4
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0xfffff, v9
 ; SDAG-NEXT:    v_lshl_or_b32 v0, v7, 20, v0
 ; SDAG-NEXT:    v_add_u32_e32 v5, 0x3ff00000, v0
-; SDAG-NEXT:  .LBB3_14: ; %Flow5
+; SDAG-NEXT:  .LBB3_12: ; %Flow5
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v4
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v5
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB3_13: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr6
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    s_branch .LBB3_2
+; SDAG-NEXT:  .LBB3_14: ; %itofp-if-then20
+; SDAG-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; SDAG-NEXT:    v_alignbit_b32 v9, v2, v1, 3
+; SDAG-NEXT:    v_mov_b32_e32 v7, v6
+; SDAG-NEXT:    s_branch .LBB3_10
 ;
 ; GISEL-LABEL: uitofp_i128_to_f64:
 ; GISEL:       ; %bb.0: ; %itofp-entry
@@ -952,7 +980,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GISEL-NEXT:    v_mov_b32_e32 v5, s5
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB3_14
+; GISEL-NEXT:    s_cbranch_execz .LBB3_12
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
 ; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
@@ -971,29 +999,22 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr6
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_cbranch_execnz .LBB3_13
+; GISEL-NEXT:  .LBB3_2: ; %Flow3
 ; GISEL-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v8
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execz .LBB3_13
-; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB3_11
+; GISEL-NEXT:  ; %bb.3: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 55, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT:    s_cbranch_execz .LBB3_8
-; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB3_7
+; GISEL-NEXT:  ; %bb.4: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 55, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB3_7
-; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    s_cbranch_execz .LBB3_6
+; GISEL-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v13, 0x49, v8
 ; GISEL-NEXT:    v_sub_u32_e32 v9, 64, v13
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v13, v[0:1]
@@ -1038,12 +1059,12 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v9
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v10
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v11
-; GISEL-NEXT:  .LBB3_7: ; %Flow1
+; GISEL-NEXT:  .LBB3_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT:  .LBB3_8: ; %Flow2
+; GISEL-NEXT:  .LBB3_7: ; %Flow2
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT:    s_cbranch_execz .LBB3_10
-; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:    s_cbranch_execz .LBB3_9
+; GISEL-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
 ; GISEL-NEXT:    v_lshlrev_b64 v[10:11], 1, v[2:3]
 ; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
@@ -1052,7 +1073,7 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v9
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v10
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v11
-; GISEL-NEXT:  .LBB3_10: ; %itofp-sw-epilog
+; GISEL-NEXT:  .LBB3_9: ; %itofp-sw-epilog
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_bfe_u32 v4, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v4
@@ -1068,25 +1089,36 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_lshrrev_b32_e32 v5, 2, v1
 ; GISEL-NEXT:    v_or_b32_e32 v9, v8, v5
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT:    v_lshlrev_b64 v[2:3], 29, v[2:3]
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 3, v1
-; GISEL-NEXT:    v_or_b32_e32 v9, v2, v0
-; GISEL-NEXT:    v_mov_b32_e32 v7, v6
-; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_cbranch_execnz .LBB3_14
+; GISEL-NEXT:  .LBB3_10: ; %Flow
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB3_13: ; %Flow4
+; GISEL-NEXT:  .LBB3_11: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff00000
 ; GISEL-NEXT:    v_lshl_add_u32 v0, v7, 20, v0
 ; GISEL-NEXT:    v_mov_b32_e32 v1, 0xfffff
 ; GISEL-NEXT:    v_and_or_b32 v5, v9, v1, v0
-; GISEL-NEXT:  .LBB3_14: ; %Flow5
+; GISEL-NEXT:  .LBB3_12: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v5
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB3_13: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v8
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v1, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    s_branch .LBB3_2
+; GISEL-NEXT:  .LBB3_14: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshlrev_b64 v[2:3], 29, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 3, v1
+; GISEL-NEXT:    v_or_b32_e32 v9, v2, v0
+; GISEL-NEXT:    v_mov_b32_e32 v7, v6
+; GISEL-NEXT:    s_branch .LBB3_10
   %cvt = uitofp i128 %x to double
   ret double %cvt
 }
@@ -1100,7 +1132,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB4_14
+; SDAG-NEXT:    s_cbranch_execz .LBB4_12
 ; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
 ; SDAG-NEXT:    v_sub_co_u32_e32 v4, vcc, 0, v0
 ; SDAG-NEXT:    v_subb_co_u32_e32 v5, vcc, 0, v1, vcc
@@ -1127,29 +1159,22 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_cmp_gt_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
-; SDAG-NEXT:    ; implicit-def: $vgpr2
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
-; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_cbranch_execnz .LBB4_13
+; SDAG-NEXT:  .LBB4_2: ; %Flow3
 ; SDAG-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    v_sub_u32_e32 v6, 0x7f, v7
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT:    s_cbranch_execz .LBB4_13
-; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB4_11
+; SDAG-NEXT:  ; %bb.3: ; %NodeBlock
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT:    s_cbranch_execz .LBB4_8
-; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB4_7
+; SDAG-NEXT:  ; %bb.4: ; %LeafBlock
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB4_7
-; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    s_cbranch_execz .LBB4_6
+; SDAG-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; SDAG-NEXT:    v_sub_u32_e32 v12, 0x66, v7
 ; SDAG-NEXT:    v_sub_u32_e32 v10, 64, v12
 ; SDAG-NEXT:    v_lshrrev_b64 v[8:9], v12, v[0:1]
@@ -1187,13 +1212,13 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v8, v15, v0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v8
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v9
-; SDAG-NEXT:  .LBB4_7: ; %Flow1
+; SDAG-NEXT:  .LBB4_6: ; %Flow1
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT:  .LBB4_8: ; %Flow2
+; SDAG-NEXT:  .LBB4_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v4, 2, v0
 ; SDAG-NEXT:    v_and_or_b32 v0, v4, 1, v0
@@ -1203,22 +1228,33 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 3
-; SDAG-NEXT:    v_mov_b32_e32 v6, v2
-; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_cbranch_execnz .LBB4_14
+; SDAG-NEXT:  .LBB4_10: ; %Flow
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT:  .LBB4_13: ; %Flow4
+; SDAG-NEXT:  .LBB4_11: ; %Flow4
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x80000000, v3
 ; SDAG-NEXT:    v_lshl_add_u32 v1, v6, 23, 1.0
 ; SDAG-NEXT:    v_and_b32_e32 v2, 0x7fffff, v8
 ; SDAG-NEXT:    v_or3_b32 v0, v2, v0, v1
 ; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v0
-; SDAG-NEXT:  .LBB4_14: ; %Flow5
+; SDAG-NEXT:  .LBB4_12: ; %Flow5
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB4_13: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v7
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v8, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr2
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; SDAG-NEXT:    s_branch .LBB4_2
+; SDAG-NEXT:  .LBB4_14: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v8, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v6, v2
+; SDAG-NEXT:    s_branch .LBB4_10
 ;
 ; GISEL-LABEL: sitofp_i128_to_f16:
 ; GISEL:       ; %bb.0: ; %itofp-entry
@@ -1229,7 +1265,7 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB4_14
+; GISEL-NEXT:    s_cbranch_execz .LBB4_12
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v6, v0
@@ -1256,29 +1292,22 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr7
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr2
-; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_cbranch_execnz .LBB4_13
+; GISEL-NEXT:  .LBB4_2: ; %Flow3
 ; GISEL-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GISEL-NEXT:    v_sub_u32_e32 v8, 0x7f, v5
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execz .LBB4_13
-; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB4_11
+; GISEL-NEXT:  ; %bb.3: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT:    s_cbranch_execz .LBB4_8
-; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB4_7
+; GISEL-NEXT:  ; %bb.4: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v7
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB4_7
-; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GISEL-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
 ; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v4, v[0:1]
@@ -1320,13 +1349,13 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v4
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
-; GISEL-NEXT:  .LBB4_7: ; %Flow1
+; GISEL-NEXT:  .LBB4_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT:  .LBB4_8: ; %Flow2
+; GISEL-NEXT:  .LBB4_7: ; %Flow2
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
@@ -1337,22 +1366,33 @@ define half @sitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT:    v_mov_b32_e32 v8, v7
-; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_cbranch_execnz .LBB4_14
+; GISEL-NEXT:  .LBB4_10: ; %Flow
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB4_13: ; %Flow4
+; GISEL-NEXT:  .LBB4_11: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
 ; GISEL-NEXT:    v_lshl_add_u32 v1, v8, 23, 1.0
 ; GISEL-NEXT:    v_and_b32_e32 v2, 0x7fffff, v4
 ; GISEL-NEXT:    v_or3_b32 v0, v2, v0, v1
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v0
-; GISEL-NEXT:  .LBB4_14: ; %Flow5
+; GISEL-NEXT:  .LBB4_12: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB4_13: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr7
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:    s_branch .LBB4_2
+; GISEL-NEXT:  .LBB4_14: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v8, v7
+; GISEL-NEXT:    s_branch .LBB4_10
   %cvt = sitofp i128 %x to half
   ret half %cvt
 }
@@ -1366,7 +1406,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; SDAG-NEXT:    v_mov_b32_e32 v4, 0
 ; SDAG-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB5_14
+; SDAG-NEXT:    s_cbranch_execz .LBB5_12
 ; SDAG-NEXT:  ; %bb.1: ; %itofp-if-end
 ; SDAG-NEXT:    v_ffbh_u32_e32 v4, v2
 ; SDAG-NEXT:    v_add_u32_e32 v4, 32, v4
@@ -1384,29 +1424,22 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    ; implicit-def: $vgpr7
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; SDAG-NEXT:  ; %bb.2: ; %itofp-if-else
-; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
-; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
-; SDAG-NEXT:    ; implicit-def: $vgpr4
-; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
-; SDAG-NEXT:  ; %bb.3: ; %Flow3
+; SDAG-NEXT:    s_cbranch_execnz .LBB5_13
+; SDAG-NEXT:  .LBB5_2: ; %Flow3
 ; SDAG-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; SDAG-NEXT:    v_sub_u32_e32 v5, 0x7f, v6
 ; SDAG-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; SDAG-NEXT:    s_cbranch_execz .LBB5_13
-; SDAG-NEXT:  ; %bb.4: ; %NodeBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB5_11
+; SDAG-NEXT:  ; %bb.3: ; %NodeBlock
 ; SDAG-NEXT:    v_cmp_lt_i32_e32 vcc, 25, v4
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; SDAG-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; SDAG-NEXT:    s_cbranch_execz .LBB5_8
-; SDAG-NEXT:  ; %bb.5: ; %LeafBlock
+; SDAG-NEXT:    s_cbranch_execz .LBB5_7
+; SDAG-NEXT:  ; %bb.4: ; %LeafBlock
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v4
 ; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; SDAG-NEXT:    s_cbranch_execz .LBB5_7
-; SDAG-NEXT:  ; %bb.6: ; %itofp-sw-default
+; SDAG-NEXT:    s_cbranch_execz .LBB5_6
+; SDAG-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; SDAG-NEXT:    v_sub_u32_e32 v11, 0x66, v6
 ; SDAG-NEXT:    v_sub_u32_e32 v9, 64, v11
 ; SDAG-NEXT:    v_lshrrev_b64 v[7:8], v11, v[0:1]
@@ -1444,13 +1477,13 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_or_b32_e32 v7, v14, v0
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v7
 ; SDAG-NEXT:    v_mov_b32_e32 v1, v8
-; SDAG-NEXT:  .LBB5_7: ; %Flow1
+; SDAG-NEXT:  .LBB5_6: ; %Flow1
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[12:13]
-; SDAG-NEXT:  .LBB5_8: ; %Flow2
+; SDAG-NEXT:  .LBB5_7: ; %Flow2
 ; SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; SDAG-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; SDAG-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; SDAG-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; SDAG-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; SDAG-NEXT:    v_lshrrev_b32_e32 v2, 2, v0
 ; SDAG-NEXT:    v_and_or_b32 v0, v2, 1, v0
@@ -1460,21 +1493,32 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 2
 ; SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; SDAG-NEXT:  ; %bb.11: ; %itofp-if-then20
-; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 3
-; SDAG-NEXT:    v_mov_b32_e32 v5, v4
-; SDAG-NEXT:  ; %bb.12: ; %Flow
+; SDAG-NEXT:    s_cbranch_execnz .LBB5_14
+; SDAG-NEXT:  .LBB5_10: ; %Flow
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT:  .LBB5_13: ; %Flow4
+; SDAG-NEXT:  .LBB5_11: ; %Flow4
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; SDAG-NEXT:    v_and_b32_e32 v0, 0x7fffff, v7
 ; SDAG-NEXT:    v_lshl_or_b32 v0, v5, 23, v0
 ; SDAG-NEXT:    v_add_u32_e32 v0, 1.0, v0
 ; SDAG-NEXT:    v_cvt_f16_f32_e32 v4, v0
-; SDAG-NEXT:  .LBB5_14: ; %Flow5
+; SDAG-NEXT:  .LBB5_12: ; %Flow5
 ; SDAG-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; SDAG-NEXT:    v_mov_b32_e32 v0, v4
 ; SDAG-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-NEXT:  .LBB5_13: ; %itofp-if-else
+; SDAG-NEXT:    v_add_u32_e32 v2, 0xffffff98, v6
+; SDAG-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; SDAG-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; SDAG-NEXT:    v_cndmask_b32_e32 v7, 0, v0, vcc
+; SDAG-NEXT:    ; implicit-def: $vgpr4
+; SDAG-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; SDAG-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; SDAG-NEXT:    s_branch .LBB5_2
+; SDAG-NEXT:  .LBB5_14: ; %itofp-if-then20
+; SDAG-NEXT:    v_alignbit_b32 v7, v1, v0, 3
+; SDAG-NEXT:    v_mov_b32_e32 v5, v4
+; SDAG-NEXT:    s_branch .LBB5_10
 ;
 ; GISEL-LABEL: uitofp_i128_to_f16:
 ; GISEL:       ; %bb.0: ; %itofp-entry
@@ -1485,7 +1529,7 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB5_14
+; GISEL-NEXT:    s_cbranch_execz .LBB5_12
 ; GISEL-NEXT:  ; %bb.1: ; %itofp-if-end
 ; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
 ; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
@@ -1503,29 +1547,22 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    ; implicit-def: $vgpr4
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
-; GISEL-NEXT:    ; implicit-def: $vgpr6
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr2
-; GISEL-NEXT:  ; %bb.3: ; %Flow3
+; GISEL-NEXT:    s_cbranch_execnz .LBB5_13
+; GISEL-NEXT:  .LBB5_2: ; %Flow3
 ; GISEL-NEXT:    s_or_saveexec_b64 s[8:9], s[4:5]
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v5
 ; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execz .LBB5_13
-; GISEL-NEXT:  ; %bb.4: ; %NodeBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB5_11
+; GISEL-NEXT:  ; %bb.3: ; %NodeBlock
 ; GISEL-NEXT:    v_cmp_le_i32_e32 vcc, 26, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
-; GISEL-NEXT:    s_cbranch_execz .LBB5_8
-; GISEL-NEXT:  ; %bb.5: ; %LeafBlock
+; GISEL-NEXT:    s_cbranch_execz .LBB5_7
+; GISEL-NEXT:  ; %bb.4: ; %LeafBlock
 ; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 26, v6
 ; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB5_7
-; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
+; GISEL-NEXT:    s_cbranch_execz .LBB5_6
+; GISEL-NEXT:  ; %bb.5: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v4, 0x66, v5
 ; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v4
 ; GISEL-NEXT:    v_lshrrev_b64 v[8:9], v4, v[0:1]
@@ -1567,13 +1604,13 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v1, v4
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v3, v6
-; GISEL-NEXT:  .LBB5_7: ; %Flow1
+; GISEL-NEXT:  .LBB5_6: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
-; GISEL-NEXT:  .LBB5_8: ; %Flow2
+; GISEL-NEXT:  .LBB5_7: ; %Flow2
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
-; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
+; GISEL-NEXT:  ; %bb.8: ; %itofp-sw-bb
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
-; GISEL-NEXT:  ; %bb.10: ; %itofp-sw-epilog
+; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-epilog
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:    v_bfe_u32 v2, v0, 2, 1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
@@ -1584,21 +1621,32 @@ define half @uitofp_i128_to_f16(i128 %x) {
 ; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
-; GISEL-NEXT:    v_mov_b32_e32 v7, v6
-; GISEL-NEXT:  ; %bb.12: ; %Flow
+; GISEL-NEXT:    s_cbranch_execnz .LBB5_14
+; GISEL-NEXT:  .LBB5_10: ; %Flow
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:  .LBB5_13: ; %Flow4
+; GISEL-NEXT:  .LBB5_11: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_lshl_add_u32 v0, v7, 23, 1.0
 ; GISEL-NEXT:    v_mov_b32_e32 v1, 0x7fffff
 ; GISEL-NEXT:    v_and_or_b32 v0, v4, v1, v0
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v0
-; GISEL-NEXT:  .LBB5_14: ; %Flow5
+; GISEL-NEXT:  .LBB5_12: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB5_13: ; %itofp-if-else
+; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffff98, v5
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
+; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    ; implicit-def: $vgpr6
+; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr2
+; GISEL-NEXT:    s_branch .LBB5_2
+; GISEL-NEXT:  .LBB5_14: ; %itofp-if-then20
+; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v7, v6
+; GISEL-NEXT:    s_branch .LBB5_10
   %cvt = uitofp i128 %x to half
   ret half %cvt
 }
diff --git a/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll b/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll
index 00a1ed506d1ed..be0ea161fb762 100644
--- a/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll
+++ b/llvm/test/CodeGen/RISCV/bitint-fp-conv-200.ll
@@ -120,7 +120,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    or a0, a2, a4
 ; RV64-NEXT:    or a6, a3, a5
 ; RV64-NEXT:    or a0, a6, a0
-; RV64-NEXT:    beqz a0, .LBB1_20
+; RV64-NEXT:    beqz a0, .LBB1_17
 ; RV64-NEXT:  # %bb.1: # %itofp-if-end
 ; RV64-NEXT:    slli a0, a4, 56
 ; RV64-NEXT:    srai a0, a0, 63
@@ -311,34 +311,14 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    sub a7, a6, t0
 ; RV64-NEXT:    li t2, 25
 ; RV64-NEXT:    sub a6, t1, t0
-; RV64-NEXT:    blt a7, t2, .LBB1_14
+; RV64-NEXT:    blt a7, t2, .LBB1_18
 ; RV64-NEXT:  # %bb.11: # %itofp-if-then4
 ; RV64-NEXT:    li t1, 26
-; RV64-NEXT:    beq a7, t1, .LBB1_16
+; RV64-NEXT:    beq a7, t1, .LBB1_14
 ; RV64-NEXT:  # %bb.12: # %itofp-if-then4
 ; RV64-NEXT:    li t1, 25
-; RV64-NEXT:    bne a7, t1, .LBB1_15
-; RV64-NEXT:  # %bb.13: # %itofp-sw-bb
-; RV64-NEXT:    srli a4, a2, 63
-; RV64-NEXT:    slli a3, a3, 1
-; RV64-NEXT:    or a3, a3, a4
-; RV64-NEXT:    slli a2, a2, 1
-; RV64-NEXT:    j .LBB1_16
-; RV64-NEXT:  .LBB1_14: # %itofp-if-else
-; RV64-NEXT:    addi a3, t0, -176
-; RV64-NEXT:    sd a2, 160(sp)
-; RV64-NEXT:    sd zero, 128(sp)
-; RV64-NEXT:    sd zero, 136(sp)
-; RV64-NEXT:    sd zero, 144(sp)
-; RV64-NEXT:    sd zero, 152(sp)
-; RV64-NEXT:    srli a2, a3, 3
-; RV64-NEXT:    andi a2, a2, 24
-; RV64-NEXT:    addi a4, sp, 160
-; RV64-NEXT:    sub a4, a4, a2
-; RV64-NEXT:    ld a2, 0(a4)
-; RV64-NEXT:    sll a2, a2, a3
-; RV64-NEXT:    j .LBB1_19
-; RV64-NEXT:  .LBB1_15: # %itofp-sw-default
+; RV64-NEXT:    beq a7, t1, .LBB1_20
+; RV64-NEXT:  # %bb.13: # %itofp-sw-default
 ; RV64-NEXT:    li t2, 174
 ; RV64-NEXT:    sd zero, 96(sp)
 ; RV64-NEXT:    sd zero, 104(sp)
@@ -405,7 +385,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    snez a2, a2
 ; RV64-NEXT:    or a2, t2, a2
 ; RV64-NEXT:    mv a3, t0
-; RV64-NEXT:  .LBB1_16: # %itofp-sw-epilog
+; RV64-NEXT:  .LBB1_14: # %itofp-sw-epilog
 ; RV64-NEXT:    slli a4, a2, 61
 ; RV64-NEXT:    srli a4, a4, 63
 ; RV64-NEXT:    or a2, a2, a4
@@ -413,18 +393,12 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    seqz a4, a2
 ; RV64-NEXT:    slli a5, a2, 37
 ; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    bltz a5, .LBB1_18
-; RV64-NEXT:  # %bb.17:
+; RV64-NEXT:    bltz a5, .LBB1_19
+; RV64-NEXT:  # %bb.15:
 ; RV64-NEXT:    srli a2, a2, 2
 ; RV64-NEXT:    slli a3, a3, 62
 ; RV64-NEXT:    or a2, a2, a3
-; RV64-NEXT:    j .LBB1_19
-; RV64-NEXT:  .LBB1_18: # %itofp-if-then20
-; RV64-NEXT:    srli a2, a2, 3
-; RV64-NEXT:    slli a3, a3, 61
-; RV64-NEXT:    or a2, a2, a3
-; RV64-NEXT:    mv a6, a7
-; RV64-NEXT:  .LBB1_19: # %itofp-if-end26
+; RV64-NEXT:  .LBB1_16: # %itofp-if-end26
 ; RV64-NEXT:    lui a3, 524288
 ; RV64-NEXT:    slli a6, a6, 23
 ; RV64-NEXT:    and a0, a0, a3
@@ -436,9 +410,35 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    or a0, a2, a0
 ; RV64-NEXT:    ld s0, 200(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 208
-; RV64-NEXT:  .LBB1_20: # %itofp-return
+; RV64-NEXT:  .LBB1_17: # %itofp-return
 ; RV64-NEXT:    sw a0, 0(a1)
 ; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB1_18: # %itofp-if-else
+; RV64-NEXT:    addi a3, t0, -176
+; RV64-NEXT:    sd a2, 160(sp)
+; RV64-NEXT:    sd zero, 128(sp)
+; RV64-NEXT:    sd zero, 136(sp)
+; RV64-NEXT:    sd zero, 144(sp)
+; RV64-NEXT:    sd zero, 152(sp)
+; RV64-NEXT:    srli a2, a3, 3
+; RV64-NEXT:    andi a2, a2, 24
+; RV64-NEXT:    addi a4, sp, 160
+; RV64-NEXT:    sub a4, a4, a2
+; RV64-NEXT:    ld a2, 0(a4)
+; RV64-NEXT:    sll a2, a2, a3
+; RV64-NEXT:    j .LBB1_16
+; RV64-NEXT:  .LBB1_19: # %itofp-if-then20
+; RV64-NEXT:    srli a2, a2, 3
+; RV64-NEXT:    slli a3, a3, 61
+; RV64-NEXT:    or a2, a2, a3
+; RV64-NEXT:    mv a6, a7
+; RV64-NEXT:    j .LBB1_16
+; RV64-NEXT:  .LBB1_20: # %itofp-sw-bb
+; RV64-NEXT:    srli a4, a2, 63
+; RV64-NEXT:    slli a3, a3, 1
+; RV64-NEXT:    or a3, a3, a4
+; RV64-NEXT:    slli a2, a2, 1
+; RV64-NEXT:    j .LBB1_14
 ;
 ; RV32-LABEL: test_bitint_200_to_float:
 ; RV32:       # %bb.0: # %itofp-entry
@@ -455,7 +455,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    or a4, t5, a3
 ; RV32-NEXT:    or a4, a4, t4
 ; RV32-NEXT:    or a0, a0, a4
-; RV32-NEXT:    beqz a0, .LBB1_35
+; RV32-NEXT:    beqz a0, .LBB1_32
 ; RV32-NEXT:  # %bb.1: # %itofp-if-end
 ; RV32-NEXT:    addi sp, sp, -224
 ; RV32-NEXT:    sw s0, 220(sp) # 4-byte Folded Spill
@@ -776,38 +776,14 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    sub t3, t2, a6
 ; RV32-NEXT:    li t5, 25
 ; RV32-NEXT:    sub t2, t4, a6
-; RV32-NEXT:    blt t3, t5, .LBB1_29
+; RV32-NEXT:    blt t3, t5, .LBB1_33
 ; RV32-NEXT:  # %bb.26: # %itofp-if-then4
 ; RV32-NEXT:    li t4, 26
-; RV32-NEXT:    beq t3, t4, .LBB1_31
+; RV32-NEXT:    beq t3, t4, .LBB1_29
 ; RV32-NEXT:  # %bb.27: # %itofp-if-then4
 ; RV32-NEXT:    li t4, 25
-; RV32-NEXT:    bne t3, t4, .LBB1_30
-; RV32-NEXT:  # %bb.28: # %itofp-sw-bb
-; RV32-NEXT:    srli a2, t1, 31
-; RV32-NEXT:    slli t0, t0, 1
-; RV32-NEXT:    or t0, t0, a2
-; RV32-NEXT:    slli t1, t1, 1
-; RV32-NEXT:    j .LBB1_31
-; RV32-NEXT:  .LBB1_29: # %itofp-if-else
-; RV32-NEXT:    addi a2, a6, -176
-; RV32-NEXT:    sw t1, 160(sp)
-; RV32-NEXT:    sw zero, 144(sp)
-; RV32-NEXT:    sw zero, 148(sp)
-; RV32-NEXT:    sw zero, 152(sp)
-; RV32-NEXT:    sw zero, 156(sp)
-; RV32-NEXT:    sw zero, 128(sp)
-; RV32-NEXT:    sw zero, 132(sp)
-; RV32-NEXT:    sw zero, 136(sp)
-; RV32-NEXT:    sw zero, 140(sp)
-; RV32-NEXT:    srli a3, a2, 3
-; RV32-NEXT:    andi a3, a3, 28
-; RV32-NEXT:    addi a4, sp, 160
-; RV32-NEXT:    sub a4, a4, a3
-; RV32-NEXT:    lw a3, 0(a4)
-; RV32-NEXT:    sll a2, a3, a2
-; RV32-NEXT:    j .LBB1_34
-; RV32-NEXT:  .LBB1_30: # %itofp-sw-default
+; RV32-NEXT:    beq t3, t4, .LBB1_35
+; RV32-NEXT:  # %bb.28: # %itofp-sw-default
 ; RV32-NEXT:    zext.b a7, a7
 ; RV32-NEXT:    li t5, 174
 ; RV32-NEXT:    sw zero, 112(sp)
@@ -900,7 +876,7 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    snez a2, a2
 ; RV32-NEXT:    or t1, s0, a2
 ; RV32-NEXT:    mv t0, t5
-; RV32-NEXT:  .LBB1_31: # %itofp-sw-epilog
+; RV32-NEXT:  .LBB1_29: # %itofp-sw-epilog
 ; RV32-NEXT:    slli a2, t1, 29
 ; RV32-NEXT:    srli a2, a2, 31
 ; RV32-NEXT:    or a2, t1, a2
@@ -908,18 +884,12 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    seqz a3, a2
 ; RV32-NEXT:    slli a4, a2, 5
 ; RV32-NEXT:    add t0, t0, a3
-; RV32-NEXT:    bltz a4, .LBB1_33
-; RV32-NEXT:  # %bb.32:
+; RV32-NEXT:    bltz a4, .LBB1_34
+; RV32-NEXT:  # %bb.30:
 ; RV32-NEXT:    srli a2, a2, 2
 ; RV32-NEXT:    slli t0, t0, 30
 ; RV32-NEXT:    or a2, a2, t0
-; RV32-NEXT:    j .LBB1_34
-; RV32-NEXT:  .LBB1_33: # %itofp-if-then20
-; RV32-NEXT:    srli a2, a2, 3
-; RV32-NEXT:    slli t0, t0, 29
-; RV32-NEXT:    or a2, a2, t0
-; RV32-NEXT:    mv t2, t3
-; RV32-NEXT:  .LBB1_34: # %itofp-if-end26
+; RV32-NEXT:  .LBB1_31: # %itofp-if-end26
 ; RV32-NEXT:    lui a3, 524288
 ; RV32-NEXT:    slli t2, t2, 23
 ; RV32-NEXT:    and a0, a0, a3
@@ -937,9 +907,39 @@ define void @test_bitint_200_to_float(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    lw s5, 200(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s6, 196(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 224
-; RV32-NEXT:  .LBB1_35: # %itofp-return
+; RV32-NEXT:  .LBB1_32: # %itofp-return
 ; RV32-NEXT:    sw a0, 0(a1)
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB1_33: # %itofp-if-else
+; RV32-NEXT:    addi a2, a6, -176
+; RV32-NEXT:    sw t1, 160(sp)
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw zero, 148(sp)
+; RV32-NEXT:    sw zero, 152(sp)
+; RV32-NEXT:    sw zero, 156(sp)
+; RV32-NEXT:    sw zero, 128(sp)
+; RV32-NEXT:    sw zero, 132(sp)
+; RV32-NEXT:    sw zero, 136(sp)
+; RV32-NEXT:    sw zero, 140(sp)
+; RV32-NEXT:    srli a3, a2, 3
+; RV32-NEXT:    andi a3, a3, 28
+; RV32-NEXT:    addi a4, sp, 160
+; RV32-NEXT:    sub a4, a4, a3
+; RV32-NEXT:    lw a3, 0(a4)
+; RV32-NEXT:    sll a2, a3, a2
+; RV32-NEXT:    j .LBB1_31
+; RV32-NEXT:  .LBB1_34: # %itofp-if-then20
+; RV32-NEXT:    srli a2, a2, 3
+; RV32-NEXT:    slli t0, t0, 29
+; RV32-NEXT:    or a2, a2, t0
+; RV32-NEXT:    mv t2, t3
+; RV32-NEXT:    j .LBB1_31
+; RV32-NEXT:  .LBB1_35: # %itofp-sw-bb
+; RV32-NEXT:    srli a2, t1, 31
+; RV32-NEXT:    slli t0, t0, 1
+; RV32-NEXT:    or t0, t0, a2
+; RV32-NEXT:    slli t1, t1, 1
+; RV32-NEXT:    j .LBB1_29
   %1 = load i200, ptr %in, align 8
   %2 = sitofp i200 %1 to float
   store float %2, ptr %out, align 4
@@ -963,40 +963,45 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    sd s9, 168(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    sd s10, 160(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    sd s11, 152(sp) # 8-byte Folded Spill
-; RV64-NEXT:    lw s0, 0(a0)
-; RV64-NEXT:    slli a0, s0, 33
+; RV64-NEXT:    lw s1, 0(a0)
+; RV64-NEXT:    slli a0, s1, 33
 ; RV64-NEXT:    srli a0, a0, 56
 ; RV64-NEXT:    li a2, 127
+; RV64-NEXT:    mv s0, a1
 ; RV64-NEXT:    bgeu a0, a2, .LBB2_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    li s2, 0
+; RV64-NEXT:    li s3, 0
 ; RV64-NEXT:    li s10, 0
 ; RV64-NEXT:    li a0, 0
-; RV64-NEXT:    li a2, 0
-; RV64-NEXT:    j .LBB2_10
+; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    j .LBB2_9
 ; RV64-NEXT:  .LBB2_2: # %fp-to-i-if-end
-; RV64-NEXT:    addi a2, a0, -327
-; RV64-NEXT:    sltu a3, a2, a0
-; RV64-NEXT:    addi a4, a3, -1
-; RV64-NEXT:    zext.b a4, a4
-; RV64-NEXT:    neg a5, a3
-; RV64-NEXT:    xori a6, a4, 255
-; RV64-NEXT:    or a5, a5, a6
-; RV64-NEXT:    beqz a5, .LBB2_6
+; RV64-NEXT:    addi a1, a0, -327
+; RV64-NEXT:    sltu a2, a1, a0
+; RV64-NEXT:    addi a3, a2, -1
+; RV64-NEXT:    zext.b a3, a3
+; RV64-NEXT:    neg a4, a2
+; RV64-NEXT:    xori a5, a3, 255
+; RV64-NEXT:    or a4, a4, a5
+; RV64-NEXT:    beqz a4, .LBB2_4
 ; RV64-NEXT:  # %bb.3: # %fp-to-i-if-end
-; RV64-NEXT:    sltiu a2, a4, 256
-; RV64-NEXT:    bnez a2, .LBB2_7
-; RV64-NEXT:  .LBB2_4: # %fp-to-i-if-end9
-; RV64-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
-; RV64-NEXT:    slli a1, s0, 41
-; RV64-NEXT:    srai s0, s0, 63
+; RV64-NEXT:    sltiu a1, a3, 256
+; RV64-NEXT:    j .LBB2_5
+; RV64-NEXT:  .LBB2_4:
+; RV64-NEXT:    sltiu a1, a1, -200
+; RV64-NEXT:    or a1, a2, a1
+; RV64-NEXT:  .LBB2_5: # %fp-to-i-if-end
+; RV64-NEXT:    bnez a1, .LBB2_10
+; RV64-NEXT:  # %bb.6: # %fp-to-i-if-end9
+; RV64-NEXT:    slli a1, s1, 41
+; RV64-NEXT:    srai s1, s1, 63
 ; RV64-NEXT:    lui a2, 2048
 ; RV64-NEXT:    li a3, 149
 ; RV64-NEXT:    srli a1, a1, 41
 ; RV64-NEXT:    or a1, a1, a2
-; RV64-NEXT:    ori s8, s0, 1
+; RV64-NEXT:    ori s9, s1, 1
 ; RV64-NEXT:    bltu a3, a0, .LBB2_8
-; RV64-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV64-NEXT:  # %bb.7: # %fp-to-i-if-then12
 ; RV64-NEXT:    sd zero, 48(sp)
 ; RV64-NEXT:    sd zero, 56(sp)
 ; RV64-NEXT:    sd zero, 64(sp)
@@ -1027,79 +1032,69 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    srl a2, a2, a1
 ; RV64-NEXT:    srl a0, a0, a1
 ; RV64-NEXT:    or a3, a3, a6
-; RV64-NEXT:    sd a3, 0(sp) # 8-byte Folded Spill
-; RV64-NEXT:    or s5, a2, a7
-; RV64-NEXT:    or s6, a0, a5
-; RV64-NEXT:    srl s4, a4, a1
-; RV64-NEXT:    mv a0, s6
+; RV64-NEXT:    sd a3, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    or s6, a2, a7
+; RV64-NEXT:    or s7, a0, a5
+; RV64-NEXT:    srl a0, a4, a1
+; RV64-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT:    mv a0, s7
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s8
+; RV64-NEXT:    mv a2, s9
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    mv s2, a0
-; RV64-NEXT:    mv s7, a1
-; RV64-NEXT:    mv a0, s5
+; RV64-NEXT:    mv s3, a0
+; RV64-NEXT:    mv s8, a1
+; RV64-NEXT:    mv a0, s6
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s8
+; RV64-NEXT:    mv a2, s9
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    add s7, a0, s7
-; RV64-NEXT:    sltu a0, s7, a0
-; RV64-NEXT:    add s9, a1, a0
-; RV64-NEXT:    mv a0, s6
+; RV64-NEXT:    add s8, a0, s8
+; RV64-NEXT:    sltu a0, s8, a0
+; RV64-NEXT:    add s2, a1, a0
+; RV64-NEXT:    mv a0, s7
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s0
+; RV64-NEXT:    mv a2, s1
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    add s10, a0, s7
+; RV64-NEXT:    add s10, a0, s8
 ; RV64-NEXT:    sltu a0, s10, a0
 ; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add s11, s9, a0
-; RV64-NEXT:    mv a0, s5
+; RV64-NEXT:    add s11, s2, a0
+; RV64-NEXT:    mv a0, s6
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s0
+; RV64-NEXT:    mv a2, s1
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    mv s1, a0
-; RV64-NEXT:    mv s3, s8
-; RV64-NEXT:    mv s8, a1
-; RV64-NEXT:    add s7, a0, s11
-; RV64-NEXT:    mv a0, s0
-; RV64-NEXT:    mv a1, s0
-; RV64-NEXT:    mv a2, s6
-; RV64-NEXT:    mv a3, s5
+; RV64-NEXT:    mv s4, a0
+; RV64-NEXT:    mv s5, s9
+; RV64-NEXT:    mv s9, a1
+; RV64-NEXT:    add s8, a0, s11
+; RV64-NEXT:    mv a0, s1
+; RV64-NEXT:    mv a1, s1
+; RV64-NEXT:    mv a2, s7
+; RV64-NEXT:    mv a3, s6
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    mv s5, a0
-; RV64-NEXT:    mv s6, a1
-; RV64-NEXT:    mv a0, s3
-; RV64-NEXT:    mv a1, s0
-; RV64-NEXT:    ld a2, 0(sp) # 8-byte Folded Reload
-; RV64-NEXT:    mv a3, s4
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    mv s7, a1
+; RV64-NEXT:    mv a0, s5
+; RV64-NEXT:    mv a1, s1
+; RV64-NEXT:    ld a2, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld a3, 0(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    add a2, s5, a0
-; RV64-NEXT:    sltu a3, s7, s1
-; RV64-NEXT:    sltu a4, s11, s9
-; RV64-NEXT:    add a1, s6, a1
-; RV64-NEXT:    add a0, s7, a2
-; RV64-NEXT:    add a4, s8, a4
-; RV64-NEXT:    sltu a2, a2, s5
-; RV64-NEXT:    sltu a5, a0, s7
+; RV64-NEXT:    add a2, s6, a0
+; RV64-NEXT:    sltu a3, s8, s4
+; RV64-NEXT:    sltu a4, s11, s2
+; RV64-NEXT:    add a1, s7, a1
+; RV64-NEXT:    add a0, s8, a2
+; RV64-NEXT:    add a4, s9, a4
+; RV64-NEXT:    sltu a2, a2, s6
+; RV64-NEXT:    sltu a5, a0, s8
 ; RV64-NEXT:    add a3, a4, a3
 ; RV64-NEXT:    add a1, a1, a2
 ; RV64-NEXT:    add a1, a3, a1
-; RV64-NEXT:    add a2, a1, a5
+; RV64-NEXT:    add a1, a1, a5
 ; RV64-NEXT:    j .LBB2_9
-; RV64-NEXT:  .LBB2_6:
-; RV64-NEXT:    sltiu a2, a2, -200
-; RV64-NEXT:    or a2, a3, a2
-; RV64-NEXT:    beqz a2, .LBB2_4
-; RV64-NEXT:  .LBB2_7: # %fp-to-i-if-then5
-; RV64-NEXT:    srai s0, s0, 31
-; RV64-NEXT:    not s2, s0
-; RV64-NEXT:    xori a2, s0, 127
-; RV64-NEXT:    mv s10, s2
-; RV64-NEXT:    mv a0, s2
-; RV64-NEXT:    j .LBB2_10
 ; RV64-NEXT:  .LBB2_8: # %fp-to-i-if-else
 ; RV64-NEXT:    sd a1, 112(sp)
 ; RV64-NEXT:    sd zero, 120(sp)
@@ -1129,70 +1124,68 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    sll a2, a2, a0
 ; RV64-NEXT:    sll a5, a5, a0
 ; RV64-NEXT:    sll a1, a1, a0
-; RV64-NEXT:    or s3, a2, a6
+; RV64-NEXT:    or s4, a2, a6
 ; RV64-NEXT:    or a2, a5, a7
 ; RV64-NEXT:    or a3, a1, a3
-; RV64-NEXT:    sll s4, a4, a0
-; RV64-NEXT:    mv a0, s8
-; RV64-NEXT:    mv a1, s0
+; RV64-NEXT:    sll s5, a4, a0
+; RV64-NEXT:    mv a0, s9
+; RV64-NEXT:    mv a1, s1
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    mv s2, a0
-; RV64-NEXT:    mv s5, a1
-; RV64-NEXT:    mv a0, s0
-; RV64-NEXT:    mv a1, s0
-; RV64-NEXT:    mv a2, s4
-; RV64-NEXT:    mv a3, s3
+; RV64-NEXT:    mv s3, a0
+; RV64-NEXT:    mv s6, a1
+; RV64-NEXT:    mv a0, s1
+; RV64-NEXT:    mv a1, s1
+; RV64-NEXT:    mv a2, s5
+; RV64-NEXT:    mv a3, s4
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add s1, a0, s2
-; RV64-NEXT:    sltu a0, s1, a0
-; RV64-NEXT:    add s7, a1, a0
-; RV64-NEXT:    mv a0, s3
+; RV64-NEXT:    add a1, a1, s6
+; RV64-NEXT:    add s2, a0, s3
+; RV64-NEXT:    sltu a0, s2, a0
+; RV64-NEXT:    add s8, a1, a0
+; RV64-NEXT:    mv a0, s4
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s8
+; RV64-NEXT:    mv a2, s9
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    mv s5, a0
-; RV64-NEXT:    mv s6, a1
-; RV64-NEXT:    mv a0, s4
+; RV64-NEXT:    mv s6, a0
+; RV64-NEXT:    mv s7, a1
+; RV64-NEXT:    mv a0, s5
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s8
+; RV64-NEXT:    mv a2, s9
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    mv s2, a0
-; RV64-NEXT:    add s10, s5, a1
-; RV64-NEXT:    sltu a0, s10, s5
-; RV64-NEXT:    add s6, s6, a0
-; RV64-NEXT:    mv a0, s4
+; RV64-NEXT:    mv s3, a0
+; RV64-NEXT:    add s10, s6, a1
+; RV64-NEXT:    sltu a0, s10, s6
+; RV64-NEXT:    add s7, s7, a0
+; RV64-NEXT:    mv a0, s5
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s0
+; RV64-NEXT:    mv a2, s1
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
 ; RV64-NEXT:    add s10, a0, s10
 ; RV64-NEXT:    sltu a0, s10, a0
 ; RV64-NEXT:    add a0, a1, a0
-; RV64-NEXT:    add s4, s6, a0
-; RV64-NEXT:    sltu s5, s4, s6
-; RV64-NEXT:    mv a0, s3
+; RV64-NEXT:    add s5, s7, a0
+; RV64-NEXT:    sltu s6, s5, s7
+; RV64-NEXT:    mv a0, s4
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    mv a2, s0
+; RV64-NEXT:    mv a2, s1
 ; RV64-NEXT:    li a3, 0
 ; RV64-NEXT:    call __multi3
-; RV64-NEXT:    add a1, a1, s5
-; RV64-NEXT:    add s4, a0, s4
-; RV64-NEXT:    sltu a2, s4, a0
-; RV64-NEXT:    add a0, s4, s1
+; RV64-NEXT:    add a1, a1, s6
+; RV64-NEXT:    add s5, a0, s5
+; RV64-NEXT:    sltu a2, s5, a0
+; RV64-NEXT:    add a0, s5, s2
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    add a1, a1, s8
+; RV64-NEXT:    sltu a2, a0, s5
 ; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    add a1, a1, s7
-; RV64-NEXT:    sltu a2, a0, s4
-; RV64-NEXT:    add a2, a1, a2
 ; RV64-NEXT:  .LBB2_9: # %fp-to-i-cleanup
-; RV64-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:  .LBB2_10: # %fp-to-i-cleanup
-; RV64-NEXT:    sd s2, 0(a1)
-; RV64-NEXT:    sd s10, 8(a1)
-; RV64-NEXT:    sd a0, 16(a1)
-; RV64-NEXT:    sb a2, 24(a1)
+; RV64-NEXT:    sd s3, 0(s0)
+; RV64-NEXT:    sd s10, 8(s0)
+; RV64-NEXT:    sd a0, 16(s0)
+; RV64-NEXT:    sb a1, 24(s0)
 ; RV64-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld s1, 232(sp) # 8-byte Folded Reload
@@ -1208,23 +1201,30 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV64-NEXT:    ld s11, 152(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 256
 ; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB2_10: # %fp-to-i-if-then5
+; RV64-NEXT:    srai s1, s1, 31
+; RV64-NEXT:    not s3, s1
+; RV64-NEXT:    xori a1, s1, 127
+; RV64-NEXT:    mv s10, s3
+; RV64-NEXT:    mv a0, s3
+; RV64-NEXT:    j .LBB2_9
 ;
 ; RV32-LABEL: test_float_to_bitint_200:
 ; RV32:       # %bb.0: # %fp-to-i-entry
-; RV32-NEXT:    addi sp, sp, -336
-; RV32-NEXT:    sw ra, 332(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 328(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 324(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 320(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 316(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s4, 312(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s5, 308(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s6, 304(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s7, 300(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s8, 296(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s9, 292(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 288(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s11, 284(sp) # 4-byte Folded Spill
+; RV32-NEXT:    addi sp, sp, -320
+; RV32-NEXT:    sw ra, 316(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 312(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 308(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 304(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s3, 300(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s4, 296(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s5, 292(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 288(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s7, 284(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s8, 280(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 276(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s10, 272(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s11, 268(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    lw a2, 0(a0)
 ; RV32-NEXT:    slli a0, a2, 1
 ; RV32-NEXT:    srli a0, a0, 24
@@ -1233,49 +1233,34 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    li a4, 0
-; RV32-NEXT:    li s3, 0
-; RV32-NEXT:    li s8, 0
 ; RV32-NEXT:    li s5, 0
+; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    li a5, 0
+; RV32-NEXT:    li s0, 0
 ; RV32-NEXT:    li a0, 0
 ; RV32-NEXT:    j .LBB2_31
 ; RV32-NEXT:  .LBB2_2: # %fp-to-i-if-end
-; RV32-NEXT:    addi a3, a0, -327
-; RV32-NEXT:    sltu a4, a3, a0
-; RV32-NEXT:    sltiu a3, a3, -200
-; RV32-NEXT:    addi a5, a4, -1
-; RV32-NEXT:    or a3, a4, a3
+; RV32-NEXT:    addi a4, a0, -327
+; RV32-NEXT:    sltu a3, a4, a0
+; RV32-NEXT:    sltiu a5, a4, -200
+; RV32-NEXT:    addi a4, a3, -1
+; RV32-NEXT:    or a3, a3, a5
 ; RV32-NEXT:    xori a3, a3, 1
-; RV32-NEXT:    and a3, a5, a3
-; RV32-NEXT:    bnez a3, .LBB2_4
-; RV32-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    addi a3, a2, -1
-; RV32-NEXT:    addi a0, a2, -129
-; RV32-NEXT:    zext.b a0, a0
-; RV32-NEXT:    mv a2, a3
-; RV32-NEXT:    mv a4, a3
-; RV32-NEXT:    mv s3, a3
-; RV32-NEXT:    mv s8, a3
-; RV32-NEXT:    mv s5, a3
-; RV32-NEXT:    j .LBB2_31
-; RV32-NEXT:  .LBB2_4: # %fp-to-i-if-end9
-; RV32-NEXT:    sw a1, 136(sp) # 4-byte Folded Spill
-; RV32-NEXT:    srai s0, a2, 31
+; RV32-NEXT:    and a3, a4, a3
+; RV32-NEXT:    beqz a3, .LBB2_32
+; RV32-NEXT:  # %bb.3: # %fp-to-i-if-end9
+; RV32-NEXT:    sw a1, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT:    srai s1, a2, 31
 ; RV32-NEXT:    slli a2, a2, 9
 ; RV32-NEXT:    lui a1, 2048
 ; RV32-NEXT:    li a3, 149
-; RV32-NEXT:    zext.b a4, s0
-; RV32-NEXT:    sw a4, 132(sp) # 4-byte Folded Spill
+; RV32-NEXT:    zext.b a4, s1
+; RV32-NEXT:    sw a4, 116(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    srli a2, a2, 9
 ; RV32-NEXT:    or a1, a2, a1
-; RV32-NEXT:    ori s11, s0, 1
-; RV32-NEXT:    bltu a3, a0, .LBB2_18
-; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
-; RV32-NEXT:    sw zero, 200(sp)
-; RV32-NEXT:    sw zero, 204(sp)
-; RV32-NEXT:    sw zero, 208(sp)
-; RV32-NEXT:    sw zero, 212(sp)
+; RV32-NEXT:    ori s11, s1, 1
+; RV32-NEXT:    bltu a3, a0, .LBB2_17
+; RV32-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32-NEXT:    sw zero, 184(sp)
 ; RV32-NEXT:    sw zero, 188(sp)
 ; RV32-NEXT:    sw zero, 192(sp)
@@ -1284,12 +1269,16 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    sw zero, 172(sp)
 ; RV32-NEXT:    sw zero, 176(sp)
 ; RV32-NEXT:    sw zero, 180(sp)
-; RV32-NEXT:    sw a1, 152(sp)
+; RV32-NEXT:    sw zero, 152(sp)
 ; RV32-NEXT:    sw zero, 156(sp)
 ; RV32-NEXT:    sw zero, 160(sp)
 ; RV32-NEXT:    sw zero, 164(sp)
+; RV32-NEXT:    sw a1, 136(sp)
+; RV32-NEXT:    sw zero, 140(sp)
+; RV32-NEXT:    sw zero, 144(sp)
+; RV32-NEXT:    sw zero, 148(sp)
 ; RV32-NEXT:    li a1, 150
-; RV32-NEXT:    addi a2, sp, 152
+; RV32-NEXT:    addi a2, sp, 136
 ; RV32-NEXT:    sub t0, a1, a0
 ; RV32-NEXT:    srli a0, t0, 3
 ; RV32-NEXT:    andi a1, t0, 31
@@ -1300,424 +1289,415 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    lw a2, 20(a0)
 ; RV32-NEXT:    lw a6, 24(a0)
 ; RV32-NEXT:    lw a3, 28(a0)
-; RV32-NEXT:    sw a3, 88(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a3, 68(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    lw a3, 0(a0)
 ; RV32-NEXT:    lw a4, 4(a0)
 ; RV32-NEXT:    lw a5, 8(a0)
 ; RV32-NEXT:    lw a0, 12(a0)
 ; RV32-NEXT:    srl a7, a2, t0
-; RV32-NEXT:    sw a7, 120(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a6, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a7, 104(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a6, 64(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    slli a6, a6, 1
 ; RV32-NEXT:    srl a7, a1, t0
-; RV32-NEXT:    sw a7, 116(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a7, 100(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    slli a2, a2, 1
 ; RV32-NEXT:    srl a7, a0, t0
 ; RV32-NEXT:    slli a1, a1, 1
 ; RV32-NEXT:    sll a1, a1, t1
-; RV32-NEXT:    or s3, a7, a1
+; RV32-NEXT:    or s7, a7, a1
 ; RV32-NEXT:    srl a1, a5, t0
 ; RV32-NEXT:    slli a0, a0, 1
 ; RV32-NEXT:    sll a0, a0, t1
-; RV32-NEXT:    or s4, a1, a0
+; RV32-NEXT:    or s8, a1, a0
 ; RV32-NEXT:    srl a0, a4, t0
 ; RV32-NEXT:    slli a5, a5, 1
-; RV32-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    srl a1, a3, t0
 ; RV32-NEXT:    slli a4, a4, 1
-; RV32-NEXT:    sll s8, a6, t1
-; RV32-NEXT:    sll s9, a2, t1
+; RV32-NEXT:    sll s9, a6, t1
+; RV32-NEXT:    sll s10, a2, t1
 ; RV32-NEXT:    sll a2, a5, t1
-; RV32-NEXT:    sw t1, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw t1, 72(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sll a3, a4, t1
-; RV32-NEXT:    or s5, a0, a2
-; RV32-NEXT:    or s6, a1, a3
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    or s3, a0, a2
+; RV32-NEXT:    or s4, a1, a3
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a0, 128(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv s1, a1
-; RV32-NEXT:    mv a0, s5
+; RV32-NEXT:    sw a0, 112(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv s2, a1
+; RV32-NEXT:    mv a0, s3
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s1, a0, s1
-; RV32-NEXT:    sltu a0, s1, a0
-; RV32-NEXT:    add s2, a1, a0
-; RV32-NEXT:    sw s6, 140(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    add s2, a0, s2
+; RV32-NEXT:    sltu a0, s2, a0
+; RV32-NEXT:    add s0, a1, a0
+; RV32-NEXT:    sw s4, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s1, a0, s1
-; RV32-NEXT:    sw s1, 124(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu a0, s1, a0
+; RV32-NEXT:    add s2, a0, s2
+; RV32-NEXT:    sw s2, 108(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a0, s2, a0
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s1, s2, a0
-; RV32-NEXT:    sw s5, 148(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s5
+; RV32-NEXT:    add s2, s0, a0
+; RV32-NEXT:    sw s3, 132(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s3
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s7, a0, s1
-; RV32-NEXT:    sltu a2, s1, s2
-; RV32-NEXT:    sltu a0, s7, a0
+; RV32-NEXT:    add s5, a0, s2
+; RV32-NEXT:    sltu a2, s2, s0
+; RV32-NEXT:    sltu a0, s5, a0
 ; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:    add s10, a1, a0
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    add s0, a1, a0
+; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s5, a0
+; RV32-NEXT:    mv s4, a0
 ; RV32-NEXT:    mv s6, a1
-; RV32-NEXT:    sw s3, 144(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    sw s7, 128(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s7
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s2, a0
-; RV32-NEXT:    mv s1, a1
+; RV32-NEXT:    mv s3, a0
+; RV32-NEXT:    mv s2, a1
 ; RV32-NEXT:    add s6, a0, s6
-; RV32-NEXT:    sw s4, 112(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add a2, a0, s6
-; RV32-NEXT:    add s7, s5, s7
-; RV32-NEXT:    add s10, a2, s10
-; RV32-NEXT:    sltu s5, s7, s5
-; RV32-NEXT:    add s10, s10, s5
-; RV32-NEXT:    beq s10, a2, .LBB2_7
-; RV32-NEXT:  # %bb.6: # %fp-to-i-if-then12
-; RV32-NEXT:    sltu s5, s10, a2
-; RV32-NEXT:  .LBB2_7: # %fp-to-i-if-then12
-; RV32-NEXT:    lw a3, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT:    or a3, a3, s8
-; RV32-NEXT:    sw a3, 108(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw a3, 116(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s5, s4, s5
+; RV32-NEXT:    add s11, a2, s0
+; RV32-NEXT:    sltu s0, s5, s4
+; RV32-NEXT:    add s11, s11, s0
+; RV32-NEXT:    beq s11, a2, .LBB2_6
+; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT:    sltu s0, s11, a2
+; RV32-NEXT:  .LBB2_6: # %fp-to-i-if-then12
+; RV32-NEXT:    lw a3, 104(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    or a3, a3, s9
-; RV32-NEXT:    sw a3, 104(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu a3, s6, s2
+; RV32-NEXT:    sw a3, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw a3, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT:    or a3, a3, s10
+; RV32-NEXT:    sw a3, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a3, s6, s3
 ; RV32-NEXT:    sltu a0, a2, a0
-; RV32-NEXT:    add a3, s1, a3
+; RV32-NEXT:    add a3, s2, a3
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    sw a3, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s1, a3, a0
-; RV32-NEXT:    lw a0, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw a3, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s2, a3, a0
+; RV32-NEXT:    lw a0, 128(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 80(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add a0, a0, s1
-; RV32-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s4, a0, s5
-; RV32-NEXT:    lw s1, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a0, s1
+; RV32-NEXT:    sw a1, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add a0, a0, s2
+; RV32-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s3, a0, s0
+; RV32-NEXT:    lw s0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s8, a0
+; RV32-NEXT:    mv s7, a0
 ; RV32-NEXT:    mv s9, a1
-; RV32-NEXT:    lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a0, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s6, a0
-; RV32-NEXT:    mv s5, a1
-; RV32-NEXT:    add s11, a0, s9
-; RV32-NEXT:    mv a0, s1
+; RV32-NEXT:    mv s4, a1
+; RV32-NEXT:    add s9, a0, s9
+; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a2, a0, s11
-; RV32-NEXT:    add s7, s8, s7
-; RV32-NEXT:    sw s7, 120(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu s1, s7, s8
-; RV32-NEXT:    add s3, a2, s1
-; RV32-NEXT:    add s3, s3, s10
-; RV32-NEXT:    beq s3, a2, .LBB2_9
-; RV32-NEXT:  # %bb.8: # %fp-to-i-if-then12
-; RV32-NEXT:    sltu s1, s3, a2
-; RV32-NEXT:  .LBB2_9: # %fp-to-i-if-then12
-; RV32-NEXT:    sw s3, 116(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu a3, s11, s6
+; RV32-NEXT:    add a2, a0, s9
+; RV32-NEXT:    add s5, s7, s5
+; RV32-NEXT:    sw s5, 104(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu s0, s5, s7
+; RV32-NEXT:    add a3, a2, s0
+; RV32-NEXT:    add a3, a3, s11
+; RV32-NEXT:    beq a3, a2, .LBB2_8
+; RV32-NEXT:  # %bb.7: # %fp-to-i-if-then12
+; RV32-NEXT:    sltu s0, a3, a2
+; RV32-NEXT:  .LBB2_8: # %fp-to-i-if-then12
+; RV32-NEXT:    sw a3, 100(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a3, s9, s6
 ; RV32-NEXT:    sltu a0, a2, a0
-; RV32-NEXT:    add s10, s5, a3
+; RV32-NEXT:    add s6, s4, a3
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s6, s10, a0
-; RV32-NEXT:    lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s10, s6, a0
+; RV32-NEXT:    lw a0, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s8, a0
-; RV32-NEXT:    sw a1, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s5, a0, s6
-; RV32-NEXT:    add s9, s5, s1
-; RV32-NEXT:    add s2, s4, s9
-; RV32-NEXT:    lw s11, 112(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a0, s11
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s4, a0, s10
+; RV32-NEXT:    add s9, s4, s0
+; RV32-NEXT:    add s0, s3, s9
+; RV32-NEXT:    lw s8, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s7, a0, s2
-; RV32-NEXT:    ori a0, s0, 1
+; RV32-NEXT:    mv s11, a0
+; RV32-NEXT:    sw a1, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s5, a0, s0
+; RV32-NEXT:    ori a0, s1, 1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw s3, 104(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a2, s3
+; RV32-NEXT:    lw a2, 92(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s1, a0
-; RV32-NEXT:    sw a1, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    sw a1, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 124(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a1, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s1, a0, s1
-; RV32-NEXT:    sltu a6, s2, s4
-; RV32-NEXT:    lw a2, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a0, s4, a2
-; RV32-NEXT:    lw a1, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add a6, a0, s2
+; RV32-NEXT:    sw a6, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a7, s0, s3
+; RV32-NEXT:    lw a2, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sltu a0, s3, a2
+; RV32-NEXT:    lw a1, 56(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a1, a2, a1
-; RV32-NEXT:    lw a2, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 96(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw a3, 80(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a2, a3, a2
-; RV32-NEXT:    sltu a3, s9, s5
-; RV32-NEXT:    sltu a4, s5, s8
-; RV32-NEXT:    sltu a5, s6, s10
-; RV32-NEXT:    add s8, s7, s1
-; RV32-NEXT:    mv s5, s1
-; RV32-NEXT:    lw a7, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a2, a7, a2
-; RV32-NEXT:    lw a7, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a5, a7, a5
-; RV32-NEXT:    sltu s9, s8, s7
+; RV32-NEXT:    sltu a3, s9, s4
+; RV32-NEXT:    sltu a4, s4, s7
+; RV32-NEXT:    sltu a5, s10, s6
+; RV32-NEXT:    add a6, s5, a6
+; RV32-NEXT:    lw t0, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a2, t0, a2
+; RV32-NEXT:    lw t0, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a5, t0, a5
+; RV32-NEXT:    sw a6, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu s0, a6, s5
 ; RV32-NEXT:    add a1, a2, a1
 ; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    add s3, a1, a0
 ; RV32-NEXT:    add a3, a4, a3
-; RV32-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add a3, a0, a3
-; RV32-NEXT:    sw a6, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s4, a3, a6
-; RV32-NEXT:    lw a0, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a3, s3, a3
+; RV32-NEXT:    sw a7, 60(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s9, a3, a7
+; RV32-NEXT:    lw a0, 128(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw s10, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s10, a0, s10
-; RV32-NEXT:    mv a0, s11
+; RV32-NEXT:    sw a1, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw s2, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s2, a0, s2
+; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s10, a0, s10
-; RV32-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu s11, s7, a0
-; RV32-NEXT:    add s1, s10, s11
-; RV32-NEXT:    add s1, s1, s4
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv s4, a0
+; RV32-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s2, a0, s2
+; RV32-NEXT:    sltu s11, s5, s11
+; RV32-NEXT:    add s5, s2, s11
+; RV32-NEXT:    add s5, s5, s9
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s3
+; RV32-NEXT:    lw a2, 92(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s2, a0, s2
-; RV32-NEXT:    ori a0, s0, 1
+; RV32-NEXT:    sw a1, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s6, a0, s6
+; RV32-NEXT:    ori a0, s1, 1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s2, a0, s2
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    sw a1, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s10, a0, s6
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 124(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw s6, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw s6, 36(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add s6, a0, s6
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s7, a0
-; RV32-NEXT:    sw s6, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s6, 8(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    add s6, a0, s6
-; RV32-NEXT:    sw s2, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s2, s6, s2
-; RV32-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a0, s5, a0
-; RV32-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s2, s2, a0
-; RV32-NEXT:    add s5, s2, s9
-; RV32-NEXT:    add s5, s1, s5
-; RV32-NEXT:    sw a1, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    beq s5, s1, .LBB2_11
-; RV32-NEXT:  # %bb.10: # %fp-to-i-if-then12
-; RV32-NEXT:    sltu s9, s5, s1
-; RV32-NEXT:  .LBB2_11: # %fp-to-i-if-then12
-; RV32-NEXT:    lw a4, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw s10, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s10, s6, s10
+; RV32-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sltu s8, a2, a0
+; RV32-NEXT:    add s10, s10, s8
+; RV32-NEXT:    sw s0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s0, s10, s0
+; RV32-NEXT:    add s0, s5, s0
+; RV32-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT:    beq s0, s5, .LBB2_10
+; RV32-NEXT:  # %bb.9: # %fp-to-i-if-then12
+; RV32-NEXT:    sltu a0, s0, s5
+; RV32-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT:  .LBB2_10: # %fp-to-i-if-then12
+; RV32-NEXT:    lw a4, 68(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    slli a1, a4, 1
-; RV32-NEXT:    lw a2, 96(sp) # 4-byte Folded Reload
-; RV32-NEXT:    beq s1, s10, .LBB2_13
-; RV32-NEXT:  # %bb.12: # %fp-to-i-if-then12
-; RV32-NEXT:    sltu s11, s1, s10
-; RV32-NEXT:  .LBB2_13: # %fp-to-i-if-then12
-; RV32-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT:    beq s5, s2, .LBB2_12
+; RV32-NEXT:  # %bb.11: # %fp-to-i-if-then12
+; RV32-NEXT:    sltu s11, s5, s2
+; RV32-NEXT:  .LBB2_12: # %fp-to-i-if-then12
+; RV32-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    srl a0, a0, a2
-; RV32-NEXT:    lw a3, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a3, 72(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sll a1, a1, a3
-; RV32-NEXT:    lw s3, 116(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a3, 52(sp) # 4-byte Folded Reload
-; RV32-NEXT:    beq s4, a3, .LBB2_15
-; RV32-NEXT:  # %bb.14: # %fp-to-i-if-then12
-; RV32-NEXT:    sltu a3, s4, a3
-; RV32-NEXT:    sw a3, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT:  .LBB2_15: # %fp-to-i-if-then12
-; RV32-NEXT:    sw s9, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw s5, 60(sp) # 4-byte Folded Reload
+; RV32-NEXT:    beq s9, s3, .LBB2_14
+; RV32-NEXT:  # %bb.13: # %fp-to-i-if-then12
+; RV32-NEXT:    sltu s5, s9, s3
+; RV32-NEXT:  .LBB2_14: # %fp-to-i-if-then12
 ; RV32-NEXT:    or a0, a0, a1
-; RV32-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    srl a0, a4, a2
-; RV32-NEXT:    sw a0, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a1, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a0, a1, a0
+; RV32-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a1, s10, a1
-; RV32-NEXT:    lw s1, 48(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s1, s1, a0
-; RV32-NEXT:    lw s4, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sltu a0, a1, a0
+; RV32-NEXT:    sltu a1, s2, s4
+; RV32-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s2, s2, a0
+; RV32-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add s4, s4, a1
-; RV32-NEXT:    lw a0, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a0, 128(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a0, a0, s1
+; RV32-NEXT:    add a0, a0, s2
 ; RV32-NEXT:    add a0, a0, s4
-; RV32-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, a0, s5
 ; RV32-NEXT:    add s11, a0, s11
-; RV32-NEXT:    lw s10, 76(sp) # 4-byte Folded Reload
-; RV32-NEXT:    beq s2, s6, .LBB2_17
-; RV32-NEXT:  # %bb.16: # %fp-to-i-if-then12
-; RV32-NEXT:    sltu s10, s2, s6
-; RV32-NEXT:  .LBB2_17: # %fp-to-i-if-then12
-; RV32-NEXT:    lw a0, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 148(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a1, s9
-; RV32-NEXT:    lw a2, 132(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT:    beq s10, s6, .LBB2_16
+; RV32-NEXT:  # %bb.15: # %fp-to-i-if-then12
+; RV32-NEXT:    sltu s8, s10, s6
+; RV32-NEXT:  .LBB2_16: # %fp-to-i-if-then12
+; RV32-NEXT:    lw a0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv s9, s8
+; RV32-NEXT:    lw s8, 132(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a1, s8
+; RV32-NEXT:    lw a2, 116(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s1, a0
-; RV32-NEXT:    lw a0, 112(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a1, 144(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a2, s0
-; RV32-NEXT:    mv a3, s0
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 128(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a2, s1
+; RV32-NEXT:    mv a3, s1
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s1, a0, s1
-; RV32-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s2, a0, s2
+; RV32-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a0, a1, a0
 ; RV32-NEXT:    sltu a1, s6, s7
-; RV32-NEXT:    lw s2, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s2, s2, a0
-; RV32-NEXT:    lw s4, 36(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s3, s3, a0
+; RV32-NEXT:    lw s4, 20(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add s4, s4, a1
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s9
+; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a0, a0, s2
-; RV32-NEXT:    lw a1, 72(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a2, 68(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a0, a0, s3
+; RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 48(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a1, a2, a1
-; RV32-NEXT:    lw a2, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a3, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a3, 36(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a2, a3, a2
 ; RV32-NEXT:    add a0, a0, s4
-; RV32-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s2, s2, a1
-; RV32-NEXT:    add s4, a0, s1
-; RV32-NEXT:    lw s1, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s1, s1, a2
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    lw s3, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s3, s3, a1
+; RV32-NEXT:    add s4, a0, s2
+; RV32-NEXT:    lw s2, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s2, s2, a2
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw s6, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 84(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    mv a2, s6
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a0, a0, s2
-; RV32-NEXT:    add s2, a0, s1
-; RV32-NEXT:    lw a0, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a0, a0, s3
+; RV32-NEXT:    add s3, a0, s2
+; RV32-NEXT:    lw a0, 92(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    mv a1, s6
-; RV32-NEXT:    mv a2, s0
-; RV32-NEXT:    mv a3, s0
+; RV32-NEXT:    mv a2, s1
+; RV32-NEXT:    mv a3, s1
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s1, a0
-; RV32-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a1, 96(sp) # 4-byte Folded Reload
-; RV32-NEXT:    ori a2, s0, 1
-; RV32-NEXT:    mv a3, s0
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT:    ori a2, s1, 1
+; RV32-NEXT:    mv a3, s1
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a0, a0, s1
-; RV32-NEXT:    add a0, s2, a0
+; RV32-NEXT:    add a0, a0, s2
+; RV32-NEXT:    add a0, s3, a0
 ; RV32-NEXT:    add a0, s4, a0
-; RV32-NEXT:    add a0, a0, s10
+; RV32-NEXT:    add a0, a0, s9
 ; RV32-NEXT:    add a0, s11, a0
-; RV32-NEXT:    lw a1, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 80(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add a0, a0, a1
 ; RV32-NEXT:    zext.b a0, a0
-; RV32-NEXT:    lw a1, 136(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a3, 128(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a2, 124(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a4, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT:    j .LBB2_31
-; RV32-NEXT:  .LBB2_18: # %fp-to-i-if-else
-; RV32-NEXT:    sw zero, 264(sp)
-; RV32-NEXT:    sw zero, 268(sp)
-; RV32-NEXT:    sw zero, 272(sp)
-; RV32-NEXT:    sw zero, 276(sp)
-; RV32-NEXT:    sw a1, 248(sp)
+; RV32-NEXT:    lw a1, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a3, 112(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT:    j .LBB2_30
+; RV32-NEXT:  .LBB2_17: # %fp-to-i-if-else
+; RV32-NEXT:    sw zero, 248(sp)
 ; RV32-NEXT:    sw zero, 252(sp)
 ; RV32-NEXT:    sw zero, 256(sp)
 ; RV32-NEXT:    sw zero, 260(sp)
-; RV32-NEXT:    sw zero, 232(sp)
+; RV32-NEXT:    sw a1, 232(sp)
 ; RV32-NEXT:    sw zero, 236(sp)
 ; RV32-NEXT:    sw zero, 240(sp)
 ; RV32-NEXT:    sw zero, 244(sp)
@@ -1725,8 +1705,12 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    sw zero, 220(sp)
 ; RV32-NEXT:    sw zero, 224(sp)
 ; RV32-NEXT:    sw zero, 228(sp)
+; RV32-NEXT:    sw zero, 200(sp)
+; RV32-NEXT:    sw zero, 204(sp)
+; RV32-NEXT:    sw zero, 208(sp)
+; RV32-NEXT:    sw zero, 212(sp)
 ; RV32-NEXT:    addi a7, a0, -150
-; RV32-NEXT:    addi a0, sp, 248
+; RV32-NEXT:    addi a0, sp, 232
 ; RV32-NEXT:    srli a1, a7, 3
 ; RV32-NEXT:    andi a2, a7, 31
 ; RV32-NEXT:    andi a1, a1, 28
@@ -1735,438 +1719,446 @@ define void @test_float_to_bitint_200(ptr %in, ptr %out) nounwind {
 ; RV32-NEXT:    lw a1, 16(a0)
 ; RV32-NEXT:    lw a5, 20(a0)
 ; RV32-NEXT:    lw a2, 24(a0)
-; RV32-NEXT:    sw a2, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a2, 64(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    lw a2, 28(a0)
-; RV32-NEXT:    sw a2, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a2, 68(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    lw a2, 0(a0)
 ; RV32-NEXT:    lw a3, 4(a0)
 ; RV32-NEXT:    lw a4, 8(a0)
 ; RV32-NEXT:    lw a0, 12(a0)
-; RV32-NEXT:    sw a5, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a5, 60(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sll a5, a5, a7
-; RV32-NEXT:    sw a5, 140(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a5, 104(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    srli a5, a1, 1
 ; RV32-NEXT:    sll a1, a1, a7
-; RV32-NEXT:    sw a1, 120(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    srli a1, a0, 1
 ; RV32-NEXT:    sll a0, a0, a7
 ; RV32-NEXT:    srli a6, a4, 1
 ; RV32-NEXT:    sll a4, a4, a7
 ; RV32-NEXT:    srl a6, a6, t0
-; RV32-NEXT:    or s3, a0, a6
+; RV32-NEXT:    or s9, a0, a6
 ; RV32-NEXT:    srli a0, a3, 1
 ; RV32-NEXT:    sll a3, a3, a7
 ; RV32-NEXT:    srl a0, a0, t0
-; RV32-NEXT:    or s4, a4, a0
+; RV32-NEXT:    or s8, a4, a0
 ; RV32-NEXT:    srli a0, a2, 1
-; RV32-NEXT:    srl s8, a5, t0
-; RV32-NEXT:    srl s9, a1, t0
-; RV32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT:    srl s7, a5, t0
+; RV32-NEXT:    srl s10, a1, t0
+; RV32-NEXT:    sw t0, 72(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    srl a0, a0, t0
-; RV32-NEXT:    or s6, a3, a0
-; RV32-NEXT:    sw a7, 88(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sll s7, a2, a7
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    or s4, a3, a0
+; RV32-NEXT:    sw a7, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sll s5, a2, a7
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s1, a0
-; RV32-NEXT:    mv s2, a1
-; RV32-NEXT:    mv a0, s7
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    mv s3, a1
+; RV32-NEXT:    mv a0, s5
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a0, 128(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s5, s1, a1
-; RV32-NEXT:    sltu a0, s5, s1
-; RV32-NEXT:    add s2, s2, a0
-; RV32-NEXT:    sw s7, 144(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s7
+; RV32-NEXT:    sw a0, 112(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s0, s2, a1
+; RV32-NEXT:    sltu a0, s0, s2
+; RV32-NEXT:    add s3, s3, a0
+; RV32-NEXT:    sw s5, 124(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s5
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s5, a0, s5
-; RV32-NEXT:    sw s5, 124(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu a0, s5, a0
+; RV32-NEXT:    add s0, a0, s0
+; RV32-NEXT:    sw s0, 108(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a0, s0, a0
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s1, s2, a0
-; RV32-NEXT:    sltu s2, s1, s2
-; RV32-NEXT:    sw s6, 148(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    add s0, s3, a0
+; RV32-NEXT:    sltu s2, s0, s3
+; RV32-NEXT:    sw s4, 132(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add a1, a1, s2
-; RV32-NEXT:    add s10, a0, s1
-; RV32-NEXT:    sltu a0, s10, a0
-; RV32-NEXT:    add s6, a1, a0
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    add s0, a0, s0
+; RV32-NEXT:    sltu a0, s0, a0
+; RV32-NEXT:    add s5, a1, a0
+; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s5, a0
-; RV32-NEXT:    mv s7, a1
-; RV32-NEXT:    sw s3, 112(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    mv s4, a0
+; RV32-NEXT:    mv s6, a1
+; RV32-NEXT:    sw s9, 128(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s9
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s11
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s2, a0
-; RV32-NEXT:    mv s1, a1
-; RV32-NEXT:    add s7, a0, s7
-; RV32-NEXT:    sw s4, 108(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    mv s3, a0
+; RV32-NEXT:    mv s2, a1
+; RV32-NEXT:    add s6, a0, s6
+; RV32-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a2, a0, s7
-; RV32-NEXT:    add s10, s5, s10
-; RV32-NEXT:    add s6, a2, s6
-; RV32-NEXT:    sltu s5, s10, s5
-; RV32-NEXT:    add s11, s6, s5
-; RV32-NEXT:    beq s11, a2, .LBB2_20
-; RV32-NEXT:  # %bb.19: # %fp-to-i-if-else
-; RV32-NEXT:    sltu s5, s11, a2
-; RV32-NEXT:  .LBB2_20: # %fp-to-i-if-else
-; RV32-NEXT:    lw a3, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT:    or a3, a3, s8
-; RV32-NEXT:    sw a3, 104(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw a3, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT:    or a3, a3, s9
-; RV32-NEXT:    sw a3, 140(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu a3, s7, s2
+; RV32-NEXT:    add a2, a0, s6
+; RV32-NEXT:    add s11, s4, s0
+; RV32-NEXT:    add s5, a2, s5
+; RV32-NEXT:    sltu s0, s11, s4
+; RV32-NEXT:    add s9, s5, s0
+; RV32-NEXT:    beq s9, a2, .LBB2_19
+; RV32-NEXT:  # %bb.18: # %fp-to-i-if-else
+; RV32-NEXT:    sltu s0, s9, a2
+; RV32-NEXT:  .LBB2_19: # %fp-to-i-if-else
+; RV32-NEXT:    lw a3, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT:    or a3, a3, s7
+; RV32-NEXT:    sw a3, 84(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw a3, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT:    or a3, a3, s10
+; RV32-NEXT:    sw a3, 92(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a3, s6, s3
 ; RV32-NEXT:    sltu a0, a2, a0
-; RV32-NEXT:    add a3, s1, a3
+; RV32-NEXT:    add a3, s2, a3
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    sw a3, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s1, a3, a0
-; RV32-NEXT:    lw a0, 112(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw a3, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s2, a3, a0
+; RV32-NEXT:    lw a0, 128(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s1, a0, s1
-; RV32-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s2, s1, s5
-; RV32-NEXT:    lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s2, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s2, a0, s2
+; RV32-NEXT:    sw s2, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s3, s2, s0
+; RV32-NEXT:    lw a0, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s7, a0
-; RV32-NEXT:    mv s5, a1
-; RV32-NEXT:    lw s1, 144(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a0, s1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    mv s4, a1
+; RV32-NEXT:    lw s0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s8, a0
-; RV32-NEXT:    add s4, s7, a1
-; RV32-NEXT:    mv a0, s1
+; RV32-NEXT:    add s2, s6, a1
+; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a2, a0, s4
-; RV32-NEXT:    add s10, s8, s10
-; RV32-NEXT:    sw s10, 120(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu s1, s10, s8
-; RV32-NEXT:    add s3, a2, s1
-; RV32-NEXT:    add s3, s3, s11
-; RV32-NEXT:    beq s3, a2, .LBB2_22
-; RV32-NEXT:  # %bb.21: # %fp-to-i-if-else
-; RV32-NEXT:    sltu s1, s3, a2
-; RV32-NEXT:  .LBB2_22: # %fp-to-i-if-else
-; RV32-NEXT:    sw s3, 116(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sltu a3, s4, s7
+; RV32-NEXT:    add a2, a0, s2
+; RV32-NEXT:    add s11, s8, s11
+; RV32-NEXT:    sw s11, 104(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu s0, s11, s8
+; RV32-NEXT:    add a3, a2, s0
+; RV32-NEXT:    add a3, a3, s9
+; RV32-NEXT:    beq a3, a2, .LBB2_21
+; RV32-NEXT:  # %bb.20: # %fp-to-i-if-else
+; RV32-NEXT:    sltu s0, a3, a2
+; RV32-NEXT:  .LBB2_21: # %fp-to-i-if-else
+; RV32-NEXT:    sw a3, 100(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a3, s2, s6
 ; RV32-NEXT:    sltu a0, a2, a0
-; RV32-NEXT:    add s5, s5, a3
+; RV32-NEXT:    add s8, s4, a3
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s9, s5, a0
-; RV32-NEXT:    lw a0, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s10, s8, a0
+; RV32-NEXT:    lw a0, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s10, a0
-; RV32-NEXT:    sw a1, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s8, a0, s9
-; RV32-NEXT:    add s3, s8, s1
-; RV32-NEXT:    add s1, s2, s3
-; RV32-NEXT:    lw s6, 108(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    mv s5, a0
+; RV32-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s9, a0, s10
+; RV32-NEXT:    add s11, s9, s0
+; RV32-NEXT:    add s0, s3, s11
+; RV32-NEXT:    lw s7, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a0, s7
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s11, a0
-; RV32-NEXT:    sw a1, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s4, a0, s1
-; RV32-NEXT:    ori a0, s0, 1
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    sw a1, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s4, a0, s0
+; RV32-NEXT:    ori a0, s1, 1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 92(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s7, a0
-; RV32-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 124(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a1, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s7, a0, s7
-; RV32-NEXT:    sltu a6, s1, s2
-; RV32-NEXT:    lw a3, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a1, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add a5, a0, s2
+; RV32-NEXT:    sw a5, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu a6, s0, s3
+; RV32-NEXT:    lw a3, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a0, a3, a0
-; RV32-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a2, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 96(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 56(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a1, a2, a1
-; RV32-NEXT:    sltu a2, s2, a3
-; RV32-NEXT:    sltu a3, s3, s8
+; RV32-NEXT:    sltu s3, s3, a3
+; RV32-NEXT:    sltu a2, s11, s9
+; RV32-NEXT:    sltu a3, s10, s8
 ; RV32-NEXT:    sltu a4, s9, s5
-; RV32-NEXT:    sltu a5, s8, s10
-; RV32-NEXT:    add s8, s4, s7
-; RV32-NEXT:    mv s2, s7
-; RV32-NEXT:    lw a7, 96(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a5, s4, a5
+; RV32-NEXT:    lw a7, 80(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add a1, a7, a1
-; RV32-NEXT:    lw a7, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a4, a7, a4
-; RV32-NEXT:    sltu s5, s8, s4
+; RV32-NEXT:    lw a7, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a3, a7, a3
+; RV32-NEXT:    sw a5, 96(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sltu s0, a5, s4
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add a4, a4, a5
-; RV32-NEXT:    add s10, a0, a2
-; RV32-NEXT:    add a3, a4, a3
-; RV32-NEXT:    add a3, s10, a3
-; RV32-NEXT:    sw a6, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s3, a3, a6
-; RV32-NEXT:    lw s9, 112(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a0, s9
+; RV32-NEXT:    add a3, a3, a4
+; RV32-NEXT:    add s3, a0, s3
+; RV32-NEXT:    add a2, a3, a2
+; RV32-NEXT:    add a2, s3, a2
+; RV32-NEXT:    sw a6, 40(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s11, a2, a6
+; RV32-NEXT:    lw a0, 128(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 48(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw s1, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s1, a0, s1
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw s9, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s9, a0, s9
+; RV32-NEXT:    mv a0, s7
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 32(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s6, a0, s1
-; RV32-NEXT:    sltu s7, s4, s11
-; RV32-NEXT:    add s11, s6, s7
-; RV32-NEXT:    add s11, s11, s3
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv s2, a0
+; RV32-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s9, 16(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s9, a0, s9
+; RV32-NEXT:    sltu s8, s4, s6
+; RV32-NEXT:    add s10, s9, s8
+; RV32-NEXT:    add s10, s10, s11
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s1, a0
-; RV32-NEXT:    sw a1, 68(sp) # 4-byte Folded Spill
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    mv s4, a0
+; RV32-NEXT:    sw a1, 56(sp) # 4-byte Folded Spill
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 144(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 124(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 60(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sw a1, 48(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw a1, 36(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    sw s1, 64(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s1, s1, a0
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    sw s4, 52(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s4, s4, a0
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 140(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 92(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw a1, 44(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s4, a0, s4
-; RV32-NEXT:    ori a0, s0, 1
+; RV32-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    lw s6, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s6, a0, s6
+; RV32-NEXT:    ori a0, s1, 1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    lw a2, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add a0, a0, s4
-; RV32-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s4, s1, a0
-; RV32-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a0, s2, a0
-; RV32-NEXT:    sw a0, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s4, s4, a0
-; RV32-NEXT:    sw s5, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT:    add s5, s4, s5
-; RV32-NEXT:    add s5, s11, s5
-; RV32-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    beq s5, s11, .LBB2_24
-; RV32-NEXT:  # %bb.23: # %fp-to-i-if-else
-; RV32-NEXT:    sltu a0, s5, s11
-; RV32-NEXT:    sw a0, 100(sp) # 4-byte Folded Spill
-; RV32-NEXT:  .LBB2_24: # %fp-to-i-if-else
-; RV32-NEXT:    lw a2, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv s7, a0
+; RV32-NEXT:    sw s6, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s6, a0, s6
+; RV32-NEXT:    add s5, s4, s6
+; RV32-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sltu a0, a2, a0
+; RV32-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s5, s5, a0
+; RV32-NEXT:    sw s0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT:    add s0, s5, s0
+; RV32-NEXT:    add s0, s10, s0
+; RV32-NEXT:    sw a1, 0(sp) # 4-byte Folded Spill
+; RV32-NEXT:    beq s0, s10, .LBB2_23
+; RV32-NEXT:  # %bb.22: # %fp-to-i-if-else
+; RV32-NEXT:    sltu a0, s0, s10
+; RV32-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32-NEXT:  .LBB2_23: # %fp-to-i-if-else
+; RV32-NEXT:    lw a2, 60(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    srli a2, a2, 1
-; RV32-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    srli a1, a0, 1
-; RV32-NEXT:    beq s11, s6, .LBB2_26
-; RV32-NEXT:  # %bb.25: # %fp-to-i-if-else
-; RV32-NEXT:    sltu s7, s11, s6
-; RV32-NEXT:  .LBB2_26: # %fp-to-i-if-else
-; RV32-NEXT:    lw a3, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT:    beq s10, s9, .LBB2_25
+; RV32-NEXT:  # %bb.24: # %fp-to-i-if-else
+; RV32-NEXT:    sltu s8, s10, s9
+; RV32-NEXT:  .LBB2_25: # %fp-to-i-if-else
+; RV32-NEXT:    lw a3, 76(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sll a0, a0, a3
-; RV32-NEXT:    lw a4, 84(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a4, 72(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    srl a2, a2, a4
-; RV32-NEXT:    lw a5, 80(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a5, 68(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sll a3, a5, a3
 ; RV32-NEXT:    srl a1, a1, a4
-; RV32-NEXT:    beq s3, s10, .LBB2_28
-; RV32-NEXT:  # %bb.27: # %fp-to-i-if-else
-; RV32-NEXT:    sltu a4, s3, s10
-; RV32-NEXT:    sw a4, 96(sp) # 4-byte Folded Spill
-; RV32-NEXT:  .LBB2_28: # %fp-to-i-if-else
-; RV32-NEXT:    or s11, a0, a2
-; RV32-NEXT:    or s10, a3, a1
-; RV32-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a1, 32(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a0, a1, a0
+; RV32-NEXT:    lw s10, 40(sp) # 4-byte Folded Reload
+; RV32-NEXT:    beq s11, s3, .LBB2_27
+; RV32-NEXT:  # %bb.26: # %fp-to-i-if-else
+; RV32-NEXT:    sltu s10, s11, s3
+; RV32-NEXT:  .LBB2_27: # %fp-to-i-if-else
+; RV32-NEXT:    or s3, a0, a2
+; RV32-NEXT:    or s11, a3, a1
+; RV32-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a1, s6, a1
-; RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sltu a0, a1, a0
+; RV32-NEXT:    sltu a1, s9, s2
+; RV32-NEXT:    lw s2, 28(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add s2, s2, a0
-; RV32-NEXT:    lw s3, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s3, s3, a1
-; RV32-NEXT:    mv a0, s9
+; RV32-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s9, s9, a1
+; RV32-NEXT:    lw a0, 128(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s0
+; RV32-NEXT:    mv a2, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add a0, a0, s2
-; RV32-NEXT:    add a0, a0, s3
-; RV32-NEXT:    lw a1, 96(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add s7, a0, s7
-; RV32-NEXT:    beq s4, s1, .LBB2_30
-; RV32-NEXT:  # %bb.29: # %fp-to-i-if-else
-; RV32-NEXT:    sltu a0, s4, s1
-; RV32-NEXT:    sw a0, 92(sp) # 4-byte Folded Spill
-; RV32-NEXT:  .LBB2_30: # %fp-to-i-if-else
-; RV32-NEXT:    lw a0, 140(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 104(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a1, s6
-; RV32-NEXT:    mv a2, s0
-; RV32-NEXT:    mv a3, s0
+; RV32-NEXT:    add a0, a0, s9
+; RV32-NEXT:    add a0, a0, s10
+; RV32-NEXT:    add s8, a0, s8
+; RV32-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
+; RV32-NEXT:    beq s5, s4, .LBB2_29
+; RV32-NEXT:  # %bb.28: # %fp-to-i-if-else
+; RV32-NEXT:    sltu s10, s5, s4
+; RV32-NEXT:  .LBB2_29: # %fp-to-i-if-else
+; RV32-NEXT:    lw a0, 92(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 84(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a1, s9
+; RV32-NEXT:    mv a2, s1
+; RV32-NEXT:    mv a3, s1
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s2, a0
-; RV32-NEXT:    mv a0, s11
-; RV32-NEXT:    mv a1, s10
-; RV32-NEXT:    ori a2, s0, 1
-; RV32-NEXT:    mv a3, s0
+; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    mv a1, s11
+; RV32-NEXT:    ori a2, s1, 1
+; RV32-NEXT:    mv a3, s1
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add s2, a0, s2
-; RV32-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a0, a1, a0
-; RV32-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a2, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a1, a2, a1
-; RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sltu a1, s6, s7
+; RV32-NEXT:    lw s3, 20(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add s3, s3, a0
-; RV32-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s4, s4, a1
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    lw s5, 0(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s5, s5, a1
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    mv a2, s6
+; RV32-NEXT:    mv a2, s9
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add a0, a0, s3
-; RV32-NEXT:    add a0, a0, s4
+; RV32-NEXT:    add a0, a0, s5
 ; RV32-NEXT:    add s3, a0, s2
-; RV32-NEXT:    lw a0, 108(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a1, s9
-; RV32-NEXT:    mv a2, s0
-; RV32-NEXT:    mv a3, s0
+; RV32-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 128(sp) # 4-byte Folded Reload
+; RV32-NEXT:    mv a2, s1
+; RV32-NEXT:    mv a3, s1
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s2, a0
-; RV32-NEXT:    lw a0, 144(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a0, 124(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    mv a1, s6
-; RV32-NEXT:    lw a2, 132(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 116(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add s2, s2, a0
-; RV32-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32-NEXT:    sltu a0, s1, a0
-; RV32-NEXT:    lw a1, 56(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a2, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32-NEXT:    sltu a0, s4, a0
+; RV32-NEXT:    lw a1, 44(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    sltu a1, a2, a1
-; RV32-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s1, s1, a0
-; RV32-NEXT:    lw s4, 60(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add s4, s4, a1
-; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s4, s4, a0
+; RV32-NEXT:    lw s5, 48(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add s5, s5, a1
+; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s6
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add a0, a0, s4
+; RV32-NEXT:    add a0, a0, s5
 ; RV32-NEXT:    add a0, a0, s2
-; RV32-NEXT:    add a0, a0, s1
-; RV32-NEXT:    lw a1, 92(sp) # 4-byte Folded Reload
-; RV32-NEXT:    add a0, a0, a1
+; RV32-NEXT:    add a0, a0, s4
+; RV32-NEXT:    add a0, a0, s10
 ; RV32-NEXT:    add a0, a0, s3
-; RV32-NEXT:    add a0, s7, a0
-; RV32-NEXT:    lw a1, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT:    add a0, s8, a0
+; RV32-NEXT:    lw a1, 80(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    add a0, a0, a1
 ; RV32-NEXT:    zext.b a0, a0
-; RV32-NEXT:    lw a1, 136(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a3, 128(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a2, 124(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw a4, 120(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 116(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a1, 120(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a3, 112(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a2, 108(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 104(sp) # 4-byte Folded Reload
+; RV32-NEXT:  .LBB2_30: # %fp-to-i-cleanup
+; RV32-NEXT:    lw a4, 100(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw a5, 96(sp) # 4-byte Folded Reload
 ; RV32-NEXT:  .LBB2_31: # %fp-to-i-cleanup
 ; RV32-NEXT:    sw a3, 0(a1)
 ; RV32-NEXT:    sw a2, 4(a1)
-; RV32-NEXT:    sw a4, 8(a1)
-; RV32-NEXT:    sw s3, 12(a1)
-; RV32-NEXT:    sw s8, 16(a1)
-; RV32-NEXT:    sw s5, 20(a1)
+; RV32-NEXT:    sw s5, 8(a1)
+; RV32-NEXT:    sw a4, 12(a1)
+; RV32-NEXT:    sw a5, 16(a1)
+; RV32-NEXT:    sw s0, 20(a1)
 ; RV32-NEXT:    sb a0, 24(a1)
-; RV32-NEXT:    lw ra, 332(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 328(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 324(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 320(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 316(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 312(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 308(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s6, 304(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s7, 300(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s8, 296(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s9, 292(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s10, 288(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s11, 284(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 336
+; RV32-NEXT:    lw ra, 316(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 312(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 308(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 304(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 300(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 296(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 292(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 288(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s7, 284(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s8, 280(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s9, 276(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s10, 272(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s11, 268(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 320
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB2_32: # %fp-to-i-if-then5
+; RV32-NEXT:    srli a2, a2, 31
+; RV32-NEXT:    addi a3, a2, -1
+; RV32-NEXT:    addi a0, a2, -129
+; RV32-NEXT:    zext.b a0, a0
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:    mv s5, a3
+; RV32-NEXT:    mv a4, a3
+; RV32-NEXT:    mv a5, a3
+; RV32-NEXT:    mv s0, a3
+; RV32-NEXT:    j .LBB2_31
   %1 = load float, ptr %in, align 4
   %2 = fptosi float %1 to i200
   store i200 %2, ptr %out, align 8
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 5e398191e0083..6d2bc6c5ebb43 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -1111,32 +1111,25 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IF-NEXT:    .cfi_offset s9, -44
 ; RV32IF-NEXT:    .cfi_offset s10, -48
 ; RV32IF-NEXT:    .cfi_offset s11, -52
+; RV32IF-NEXT:    .cfi_remember_state
 ; RV32IF-NEXT:    slli a2, a1, 1
 ; RV32IF-NEXT:    srli a2, a2, 21
 ; RV32IF-NEXT:    li a3, 1023
 ; RV32IF-NEXT:    bgeu a2, a3, .LBB18_2
 ; RV32IF-NEXT:  # %bb.1:
 ; RV32IF-NEXT:    li s2, 0
-; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    li s3, 0
 ; RV32IF-NEXT:    li a0, 0
-; RV32IF-NEXT:    li a2, 0
-; RV32IF-NEXT:    j .LBB18_7
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    j .LBB18_6
 ; RV32IF-NEXT:  .LBB18_2: # %fp-to-i-if-end
 ; RV32IF-NEXT:    addi a3, a2, -1151
 ; RV32IF-NEXT:    sltu a4, a3, a2
 ; RV32IF-NEXT:    sltiu a3, a3, -128
 ; RV32IF-NEXT:    or a4, a4, a3
 ; RV32IF-NEXT:    srli a3, a1, 31
-; RV32IF-NEXT:    beqz a4, .LBB18_4
-; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT:    xori a0, a3, 1
-; RV32IF-NEXT:    lui a2, 524288
-; RV32IF-NEXT:    neg s2, a0
-; RV32IF-NEXT:    sub a2, a2, a0
-; RV32IF-NEXT:    mv a1, s2
-; RV32IF-NEXT:    mv a0, s2
-; RV32IF-NEXT:    j .LBB18_7
-; RV32IF-NEXT:  .LBB18_4: # %fp-to-i-if-end9
+; RV32IF-NEXT:    bnez a4, .LBB18_20
+; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IF-NEXT:    neg s0, a3
 ; RV32IF-NEXT:    slli a1, a1, 12
 ; RV32IF-NEXT:    lui a3, 256
@@ -1144,8 +1137,8 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IF-NEXT:    srli a1, a1, 12
 ; RV32IF-NEXT:    or a1, a1, a3
 ; RV32IF-NEXT:    ori s8, s0, 1
-; RV32IF-NEXT:    bltu a4, a2, .LBB18_6
-; RV32IF-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT:    bltu a4, a2, .LBB18_5
+; RV32IF-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IF-NEXT:    sw zero, 24(sp)
 ; RV32IF-NEXT:    sw zero, 28(sp)
 ; RV32IF-NEXT:    sw zero, 32(sp)
@@ -1177,70 +1170,68 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IF-NEXT:    srl a1, a1, a0
 ; RV32IF-NEXT:    or a3, a3, a6
 ; RV32IF-NEXT:    sw a3, 4(sp) # 4-byte Folded Spill
-; RV32IF-NEXT:    or s5, a2, a7
-; RV32IF-NEXT:    or s6, a1, a5
-; RV32IF-NEXT:    srl s4, a4, a0
-; RV32IF-NEXT:    mv a0, s6
+; RV32IF-NEXT:    or s6, a2, a7
+; RV32IF-NEXT:    or s7, a1, a5
+; RV32IF-NEXT:    srl s5, a4, a0
+; RV32IF-NEXT:    mv a0, s7
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    mv s2, a0
-; RV32IF-NEXT:    mv s7, a1
-; RV32IF-NEXT:    mv a0, s5
+; RV32IF-NEXT:    mv s3, a1
+; RV32IF-NEXT:    mv a0, s6
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    add s7, a0, s7
-; RV32IF-NEXT:    sltu a0, s7, a0
-; RV32IF-NEXT:    add s9, a1, a0
-; RV32IF-NEXT:    mv a0, s6
+; RV32IF-NEXT:    add s3, a0, s3
+; RV32IF-NEXT:    sltu a0, s3, a0
+; RV32IF-NEXT:    add s10, a1, a0
+; RV32IF-NEXT:    mv a0, s7
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    add s10, a0, s7
-; RV32IF-NEXT:    sltu a0, s10, a0
+; RV32IF-NEXT:    add s3, a0, s3
+; RV32IF-NEXT:    sltu a0, s3, a0
 ; RV32IF-NEXT:    add a0, a1, a0
-; RV32IF-NEXT:    add s11, s9, a0
-; RV32IF-NEXT:    mv a0, s5
+; RV32IF-NEXT:    add s11, s10, a0
+; RV32IF-NEXT:    mv a0, s6
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    mv s1, a0
-; RV32IF-NEXT:    mv s3, s8
-; RV32IF-NEXT:    mv s8, a1
-; RV32IF-NEXT:    add s7, a0, s11
+; RV32IF-NEXT:    mv s9, a1
+; RV32IF-NEXT:    mv s4, s8
+; RV32IF-NEXT:    add s8, a0, s11
 ; RV32IF-NEXT:    mv a0, s0
 ; RV32IF-NEXT:    mv a1, s0
-; RV32IF-NEXT:    mv a2, s6
-; RV32IF-NEXT:    mv a3, s5
+; RV32IF-NEXT:    mv a2, s7
+; RV32IF-NEXT:    mv a3, s6
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    mv s5, a0
-; RV32IF-NEXT:    mv s6, a1
-; RV32IF-NEXT:    mv a0, s3
+; RV32IF-NEXT:    mv s6, a0
+; RV32IF-NEXT:    mv s7, a1
+; RV32IF-NEXT:    mv a0, s4
 ; RV32IF-NEXT:    mv a1, s0
 ; RV32IF-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IF-NEXT:    mv a3, s4
+; RV32IF-NEXT:    mv a3, s5
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    mv a2, a1
-; RV32IF-NEXT:    mv a1, s10
-; RV32IF-NEXT:    add a3, s5, a0
-; RV32IF-NEXT:    sltu a4, s7, s1
-; RV32IF-NEXT:    sltu a5, s11, s9
-; RV32IF-NEXT:    add a2, s6, a2
-; RV32IF-NEXT:    add a0, s7, a3
-; RV32IF-NEXT:    add a5, s8, a5
-; RV32IF-NEXT:    sltu a3, a3, s5
-; RV32IF-NEXT:    sltu a6, a0, s7
-; RV32IF-NEXT:    add a4, a5, a4
-; RV32IF-NEXT:    add a2, a2, a3
-; RV32IF-NEXT:    add a2, a4, a2
-; RV32IF-NEXT:    add a2, a2, a6
-; RV32IF-NEXT:    j .LBB18_7
-; RV32IF-NEXT:  .LBB18_6: # %fp-to-i-if-else
+; RV32IF-NEXT:    add a2, s6, a0
+; RV32IF-NEXT:    sltu a3, s8, s1
+; RV32IF-NEXT:    sltu a4, s11, s10
+; RV32IF-NEXT:    add a1, s7, a1
+; RV32IF-NEXT:    add a0, s8, a2
+; RV32IF-NEXT:    add a4, s9, a4
+; RV32IF-NEXT:    sltu a2, a2, s6
+; RV32IF-NEXT:    sltu a5, a0, s8
+; RV32IF-NEXT:    add a3, a4, a3
+; RV32IF-NEXT:    add a1, a1, a2
+; RV32IF-NEXT:    add a1, a3, a1
+; RV32IF-NEXT:    add a1, a1, a5
+; RV32IF-NEXT:    j .LBB18_6
+; RV32IF-NEXT:  .LBB18_5: # %fp-to-i-if-else
 ; RV32IF-NEXT:    sw a0, 56(sp)
 ; RV32IF-NEXT:    sw a1, 60(sp)
 ; RV32IF-NEXT:    sw zero, 64(sp)
@@ -1269,10 +1260,10 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IF-NEXT:    sll a2, a2, a0
 ; RV32IF-NEXT:    sll a5, a5, a0
 ; RV32IF-NEXT:    sll a1, a1, a0
-; RV32IF-NEXT:    or s3, a2, a6
+; RV32IF-NEXT:    or s4, a2, a6
 ; RV32IF-NEXT:    or a2, a5, a7
 ; RV32IF-NEXT:    or a3, a1, a3
-; RV32IF-NEXT:    sll s4, a4, a0
+; RV32IF-NEXT:    sll s3, a4, a0
 ; RV32IF-NEXT:    mv a0, s8
 ; RV32IF-NEXT:    mv a1, s0
 ; RV32IF-NEXT:    call __muldi3
@@ -1280,21 +1271,21 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IF-NEXT:    mv s5, a1
 ; RV32IF-NEXT:    mv a0, s0
 ; RV32IF-NEXT:    mv a1, s0
-; RV32IF-NEXT:    mv a2, s4
-; RV32IF-NEXT:    mv a3, s3
+; RV32IF-NEXT:    mv a2, s3
+; RV32IF-NEXT:    mv a3, s4
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    add a1, a1, s5
 ; RV32IF-NEXT:    add s1, a0, s2
 ; RV32IF-NEXT:    sltu a0, s1, a0
 ; RV32IF-NEXT:    add s7, a1, a0
-; RV32IF-NEXT:    mv a0, s3
+; RV32IF-NEXT:    mv a0, s4
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    mv s5, a0
 ; RV32IF-NEXT:    mv s6, a1
-; RV32IF-NEXT:    mv a0, s4
+; RV32IF-NEXT:    mv a0, s3
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
@@ -1303,76 +1294,75 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IF-NEXT:    add s8, s5, a1
 ; RV32IF-NEXT:    sltu a0, s8, s5
 ; RV32IF-NEXT:    add s6, s6, a0
-; RV32IF-NEXT:    mv a0, s4
+; RV32IF-NEXT:    mv a0, s3
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    add s8, a0, s8
-; RV32IF-NEXT:    sltu a0, s8, a0
+; RV32IF-NEXT:    add s3, a0, s8
+; RV32IF-NEXT:    sltu a0, s3, a0
 ; RV32IF-NEXT:    add a0, a1, a0
-; RV32IF-NEXT:    add s4, s6, a0
-; RV32IF-NEXT:    sltu s5, s4, s6
-; RV32IF-NEXT:    mv a0, s3
+; RV32IF-NEXT:    add s5, s6, a0
+; RV32IF-NEXT:    sltu s6, s5, s6
+; RV32IF-NEXT:    mv a0, s4
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    mv a2, a1
-; RV32IF-NEXT:    mv a1, s8
-; RV32IF-NEXT:    add a2, a2, s5
-; RV32IF-NEXT:    add s4, a0, s4
-; RV32IF-NEXT:    sltu a3, s4, a0
-; RV32IF-NEXT:    add a0, s4, s1
-; RV32IF-NEXT:    add a2, a2, a3
-; RV32IF-NEXT:    add a2, a2, s7
-; RV32IF-NEXT:    sltu a3, a0, s4
-; RV32IF-NEXT:    add a2, a2, a3
-; RV32IF-NEXT:  .LBB18_7: # %fp-to-i-cleanup
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    addi a4, a3, -1
-; RV32IF-NEXT:    beq a1, a4, .LBB18_9
-; RV32IF-NEXT:  # %bb.8: # %fp-to-i-cleanup
-; RV32IF-NEXT:    sltu a5, a1, a4
-; RV32IF-NEXT:    or a6, a0, a2
-; RV32IF-NEXT:    bnez a6, .LBB18_10
-; RV32IF-NEXT:    j .LBB18_11
-; RV32IF-NEXT:  .LBB18_9:
-; RV32IF-NEXT:    sltiu a5, s2, -1
-; RV32IF-NEXT:    or a6, a0, a2
-; RV32IF-NEXT:    beqz a6, .LBB18_11
+; RV32IF-NEXT:    add a1, a1, s6
+; RV32IF-NEXT:    add s5, a0, s5
+; RV32IF-NEXT:    sltu a2, s5, a0
+; RV32IF-NEXT:    add a0, s5, s1
+; RV32IF-NEXT:    add a1, a1, a2
+; RV32IF-NEXT:    add a1, a1, s7
+; RV32IF-NEXT:    sltu a2, a0, s5
+; RV32IF-NEXT:    add a1, a1, a2
+; RV32IF-NEXT:  .LBB18_6: # %fp-to-i-cleanup
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    addi a3, a2, -1
+; RV32IF-NEXT:    beq s3, a3, .LBB18_8
+; RV32IF-NEXT:  # %bb.7: # %fp-to-i-cleanup
+; RV32IF-NEXT:    sltu a4, s3, a3
+; RV32IF-NEXT:    or a5, a0, a1
+; RV32IF-NEXT:    bnez a5, .LBB18_9
+; RV32IF-NEXT:    j .LBB18_10
+; RV32IF-NEXT:  .LBB18_8:
+; RV32IF-NEXT:    sltiu a4, s2, -1
+; RV32IF-NEXT:    or a5, a0, a1
+; RV32IF-NEXT:    beqz a5, .LBB18_10
+; RV32IF-NEXT:  .LBB18_9: # %fp-to-i-cleanup
+; RV32IF-NEXT:    srli a4, a1, 31
 ; RV32IF-NEXT:  .LBB18_10: # %fp-to-i-cleanup
-; RV32IF-NEXT:    srli a5, a2, 31
-; RV32IF-NEXT:  .LBB18_11: # %fp-to-i-cleanup
-; RV32IF-NEXT:    neg a6, a5
-; RV32IF-NEXT:    addi a7, a5, -1
-; RV32IF-NEXT:    bnez a5, .LBB18_13
-; RV32IF-NEXT:  # %bb.12: # %fp-to-i-cleanup
-; RV32IF-NEXT:    mv a1, a4
-; RV32IF-NEXT:  .LBB18_13: # %fp-to-i-cleanup
-; RV32IF-NEXT:    or a4, a7, s2
-; RV32IF-NEXT:    and a2, a6, a2
-; RV32IF-NEXT:    and a5, a6, a0
-; RV32IF-NEXT:    beq a1, a3, .LBB18_15
-; RV32IF-NEXT:  # %bb.14: # %fp-to-i-cleanup
-; RV32IF-NEXT:    sltu a0, a3, a1
-; RV32IF-NEXT:    j .LBB18_16
-; RV32IF-NEXT:  .LBB18_15:
-; RV32IF-NEXT:    snez a0, a4
-; RV32IF-NEXT:  .LBB18_16: # %fp-to-i-cleanup
-; RV32IF-NEXT:    and a5, a5, a2
-; RV32IF-NEXT:    li a3, -1
-; RV32IF-NEXT:    beq a5, a3, .LBB18_18
-; RV32IF-NEXT:  # %bb.17: # %fp-to-i-cleanup
-; RV32IF-NEXT:    srli a2, a2, 31
-; RV32IF-NEXT:    xori a0, a2, 1
-; RV32IF-NEXT:  .LBB18_18: # %fp-to-i-cleanup
-; RV32IF-NEXT:    bnez a0, .LBB18_20
-; RV32IF-NEXT:  # %bb.19: # %fp-to-i-cleanup
-; RV32IF-NEXT:    lui a1, 524288
-; RV32IF-NEXT:  .LBB18_20: # %fp-to-i-cleanup
+; RV32IF-NEXT:    neg a5, a4
+; RV32IF-NEXT:    addi a6, a4, -1
+; RV32IF-NEXT:    bnez a4, .LBB18_12
+; RV32IF-NEXT:  # %bb.11: # %fp-to-i-cleanup
+; RV32IF-NEXT:    mv s3, a3
+; RV32IF-NEXT:  .LBB18_12: # %fp-to-i-cleanup
+; RV32IF-NEXT:    or a3, a6, s2
+; RV32IF-NEXT:    and a1, a5, a1
+; RV32IF-NEXT:    and a4, a5, a0
+; RV32IF-NEXT:    beq s3, a2, .LBB18_14
+; RV32IF-NEXT:  # %bb.13: # %fp-to-i-cleanup
+; RV32IF-NEXT:    sltu a0, a2, s3
+; RV32IF-NEXT:    j .LBB18_15
+; RV32IF-NEXT:  .LBB18_14:
+; RV32IF-NEXT:    snez a0, a3
+; RV32IF-NEXT:  .LBB18_15: # %fp-to-i-cleanup
+; RV32IF-NEXT:    and a4, a4, a1
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    beq a4, a2, .LBB18_17
+; RV32IF-NEXT:  # %bb.16: # %fp-to-i-cleanup
+; RV32IF-NEXT:    srli a1, a1, 31
+; RV32IF-NEXT:    xori a0, a1, 1
+; RV32IF-NEXT:  .LBB18_17: # %fp-to-i-cleanup
+; RV32IF-NEXT:    bnez a0, .LBB18_19
+; RV32IF-NEXT:  # %bb.18: # %fp-to-i-cleanup
+; RV32IF-NEXT:    lui s3, 524288
+; RV32IF-NEXT:  .LBB18_19: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    neg a0, a0
-; RV32IF-NEXT:    and a0, a0, a4
+; RV32IF-NEXT:    and a0, a0, a3
+; RV32IF-NEXT:    mv a1, s3
 ; RV32IF-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
@@ -1402,6 +1392,15 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IF-NEXT:    addi sp, sp, 128
 ; RV32IF-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB18_20: # %fp-to-i-if-then5
+; RV32IF-NEXT:    .cfi_restore_state
+; RV32IF-NEXT:    xori a0, a3, 1
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    neg s2, a0
+; RV32IF-NEXT:    sub a1, a1, a0
+; RV32IF-NEXT:    mv s3, s2
+; RV32IF-NEXT:    mv a0, s2
+; RV32IF-NEXT:    j .LBB18_6
 ;
 ; RV64IF-LABEL: stest_f64i64:
 ; RV64IF:       # %bb.0: # %entry
@@ -1474,6 +1473,7 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    .cfi_offset s9, -44
 ; RV32IFD-NEXT:    .cfi_offset s10, -48
 ; RV32IFD-NEXT:    .cfi_offset s11, -52
+; RV32IFD-NEXT:    .cfi_remember_state
 ; RV32IFD-NEXT:    fsd fa0, 16(sp)
 ; RV32IFD-NEXT:    lw a2, 20(sp)
 ; RV32IFD-NEXT:    slli a0, a2, 1
@@ -1482,26 +1482,18 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    bgeu a0, a1, .LBB18_2
 ; RV32IFD-NEXT:  # %bb.1:
 ; RV32IFD-NEXT:    li s2, 0
-; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    li s3, 0
 ; RV32IFD-NEXT:    li a0, 0
-; RV32IFD-NEXT:    li a2, 0
-; RV32IFD-NEXT:    j .LBB18_7
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    j .LBB18_6
 ; RV32IFD-NEXT:  .LBB18_2: # %fp-to-i-if-end
 ; RV32IFD-NEXT:    addi a1, a0, -1151
 ; RV32IFD-NEXT:    sltu a3, a1, a0
 ; RV32IFD-NEXT:    sltiu a1, a1, -128
 ; RV32IFD-NEXT:    or a1, a3, a1
 ; RV32IFD-NEXT:    srli a3, a2, 31
-; RV32IFD-NEXT:    beqz a1, .LBB18_4
-; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT:    xori a0, a3, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    neg s2, a0
-; RV32IFD-NEXT:    sub a2, a2, a0
-; RV32IFD-NEXT:    mv a1, s2
-; RV32IFD-NEXT:    mv a0, s2
-; RV32IFD-NEXT:    j .LBB18_7
-; RV32IFD-NEXT:  .LBB18_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT:    bnez a1, .LBB18_20
+; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IFD-NEXT:    lw a1, 16(sp)
 ; RV32IFD-NEXT:    neg s0, a3
 ; RV32IFD-NEXT:    slli a2, a2, 12
@@ -1510,8 +1502,8 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    or a2, a2, a3
 ; RV32IFD-NEXT:    li a3, 1074
 ; RV32IFD-NEXT:    ori s8, s0, 1
-; RV32IFD-NEXT:    bltu a3, a0, .LBB18_6
-; RV32IFD-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT:    bltu a3, a0, .LBB18_5
+; RV32IFD-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IFD-NEXT:    sw zero, 40(sp)
 ; RV32IFD-NEXT:    sw zero, 44(sp)
 ; RV32IFD-NEXT:    sw zero, 48(sp)
@@ -1543,70 +1535,68 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    srl a0, a0, a1
 ; RV32IFD-NEXT:    or a3, a3, a6
 ; RV32IFD-NEXT:    sw a3, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    or s5, a2, a7
-; RV32IFD-NEXT:    or s6, a0, a5
-; RV32IFD-NEXT:    srl s4, a4, a1
-; RV32IFD-NEXT:    mv a0, s6
+; RV32IFD-NEXT:    or s6, a2, a7
+; RV32IFD-NEXT:    or s7, a0, a5
+; RV32IFD-NEXT:    srl s5, a4, a1
+; RV32IFD-NEXT:    mv a0, s7
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    mv s2, a0
-; RV32IFD-NEXT:    mv s7, a1
-; RV32IFD-NEXT:    mv a0, s5
+; RV32IFD-NEXT:    mv s3, a1
+; RV32IFD-NEXT:    mv a0, s6
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    add s7, a0, s7
-; RV32IFD-NEXT:    sltu a0, s7, a0
-; RV32IFD-NEXT:    add s9, a1, a0
-; RV32IFD-NEXT:    mv a0, s6
+; RV32IFD-NEXT:    add s3, a0, s3
+; RV32IFD-NEXT:    sltu a0, s3, a0
+; RV32IFD-NEXT:    add s10, a1, a0
+; RV32IFD-NEXT:    mv a0, s7
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    add s10, a0, s7
-; RV32IFD-NEXT:    sltu a0, s10, a0
+; RV32IFD-NEXT:    add s3, a0, s3
+; RV32IFD-NEXT:    sltu a0, s3, a0
 ; RV32IFD-NEXT:    add a0, a1, a0
-; RV32IFD-NEXT:    add s11, s9, a0
-; RV32IFD-NEXT:    mv a0, s5
+; RV32IFD-NEXT:    add s11, s10, a0
+; RV32IFD-NEXT:    mv a0, s6
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    mv s1, a0
-; RV32IFD-NEXT:    mv s3, s8
-; RV32IFD-NEXT:    mv s8, a1
-; RV32IFD-NEXT:    add s7, a0, s11
+; RV32IFD-NEXT:    mv s9, a1
+; RV32IFD-NEXT:    mv s4, s8
+; RV32IFD-NEXT:    add s8, a0, s11
 ; RV32IFD-NEXT:    mv a0, s0
 ; RV32IFD-NEXT:    mv a1, s0
-; RV32IFD-NEXT:    mv a2, s6
-; RV32IFD-NEXT:    mv a3, s5
+; RV32IFD-NEXT:    mv a2, s7
+; RV32IFD-NEXT:    mv a3, s6
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    mv s5, a0
-; RV32IFD-NEXT:    mv s6, a1
-; RV32IFD-NEXT:    mv a0, s3
+; RV32IFD-NEXT:    mv s6, a0
+; RV32IFD-NEXT:    mv s7, a1
+; RV32IFD-NEXT:    mv a0, s4
 ; RV32IFD-NEXT:    mv a1, s0
 ; RV32IFD-NEXT:    lw a2, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    mv a3, s4
+; RV32IFD-NEXT:    mv a3, s5
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    mv a2, a1
-; RV32IFD-NEXT:    mv a1, s10
-; RV32IFD-NEXT:    add a3, s5, a0
-; RV32IFD-NEXT:    sltu a4, s7, s1
-; RV32IFD-NEXT:    sltu a5, s11, s9
-; RV32IFD-NEXT:    add a2, s6, a2
-; RV32IFD-NEXT:    add a0, s7, a3
-; RV32IFD-NEXT:    add a5, s8, a5
-; RV32IFD-NEXT:    sltu a3, a3, s5
-; RV32IFD-NEXT:    sltu a6, a0, s7
-; RV32IFD-NEXT:    add a4, a5, a4
-; RV32IFD-NEXT:    add a2, a2, a3
-; RV32IFD-NEXT:    add a2, a4, a2
-; RV32IFD-NEXT:    add a2, a2, a6
-; RV32IFD-NEXT:    j .LBB18_7
-; RV32IFD-NEXT:  .LBB18_6: # %fp-to-i-if-else
+; RV32IFD-NEXT:    add a2, s6, a0
+; RV32IFD-NEXT:    sltu a3, s8, s1
+; RV32IFD-NEXT:    sltu a4, s11, s10
+; RV32IFD-NEXT:    add a1, s7, a1
+; RV32IFD-NEXT:    add a0, s8, a2
+; RV32IFD-NEXT:    add a4, s9, a4
+; RV32IFD-NEXT:    sltu a2, a2, s6
+; RV32IFD-NEXT:    sltu a5, a0, s8
+; RV32IFD-NEXT:    add a3, a4, a3
+; RV32IFD-NEXT:    add a1, a1, a2
+; RV32IFD-NEXT:    add a1, a3, a1
+; RV32IFD-NEXT:    add a1, a1, a5
+; RV32IFD-NEXT:    j .LBB18_6
+; RV32IFD-NEXT:  .LBB18_5: # %fp-to-i-if-else
 ; RV32IFD-NEXT:    sw a1, 72(sp)
 ; RV32IFD-NEXT:    sw a2, 76(sp)
 ; RV32IFD-NEXT:    sw zero, 80(sp)
@@ -1635,10 +1625,10 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    sll a2, a2, a0
 ; RV32IFD-NEXT:    sll a5, a5, a0
 ; RV32IFD-NEXT:    sll a1, a1, a0
-; RV32IFD-NEXT:    or s3, a2, a6
+; RV32IFD-NEXT:    or s4, a2, a6
 ; RV32IFD-NEXT:    or a2, a5, a7
 ; RV32IFD-NEXT:    or a3, a1, a3
-; RV32IFD-NEXT:    sll s4, a4, a0
+; RV32IFD-NEXT:    sll s3, a4, a0
 ; RV32IFD-NEXT:    mv a0, s8
 ; RV32IFD-NEXT:    mv a1, s0
 ; RV32IFD-NEXT:    call __muldi3
@@ -1646,21 +1636,21 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    mv s5, a1
 ; RV32IFD-NEXT:    mv a0, s0
 ; RV32IFD-NEXT:    mv a1, s0
-; RV32IFD-NEXT:    mv a2, s4
-; RV32IFD-NEXT:    mv a3, s3
+; RV32IFD-NEXT:    mv a2, s3
+; RV32IFD-NEXT:    mv a3, s4
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    add a1, a1, s5
 ; RV32IFD-NEXT:    add s1, a0, s2
 ; RV32IFD-NEXT:    sltu a0, s1, a0
 ; RV32IFD-NEXT:    add s7, a1, a0
-; RV32IFD-NEXT:    mv a0, s3
+; RV32IFD-NEXT:    mv a0, s4
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    mv s5, a0
 ; RV32IFD-NEXT:    mv s6, a1
-; RV32IFD-NEXT:    mv a0, s4
+; RV32IFD-NEXT:    mv a0, s3
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
@@ -1669,76 +1659,75 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    add s8, s5, a1
 ; RV32IFD-NEXT:    sltu a0, s8, s5
 ; RV32IFD-NEXT:    add s6, s6, a0
-; RV32IFD-NEXT:    mv a0, s4
+; RV32IFD-NEXT:    mv a0, s3
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    add s8, a0, s8
-; RV32IFD-NEXT:    sltu a0, s8, a0
+; RV32IFD-NEXT:    add s3, a0, s8
+; RV32IFD-NEXT:    sltu a0, s3, a0
 ; RV32IFD-NEXT:    add a0, a1, a0
-; RV32IFD-NEXT:    add s4, s6, a0
-; RV32IFD-NEXT:    sltu s5, s4, s6
-; RV32IFD-NEXT:    mv a0, s3
+; RV32IFD-NEXT:    add s5, s6, a0
+; RV32IFD-NEXT:    sltu s6, s5, s6
+; RV32IFD-NEXT:    mv a0, s4
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    mv a2, a1
-; RV32IFD-NEXT:    mv a1, s8
-; RV32IFD-NEXT:    add a2, a2, s5
-; RV32IFD-NEXT:    add s4, a0, s4
-; RV32IFD-NEXT:    sltu a3, s4, a0
-; RV32IFD-NEXT:    add a0, s4, s1
-; RV32IFD-NEXT:    add a2, a2, a3
-; RV32IFD-NEXT:    add a2, a2, s7
-; RV32IFD-NEXT:    sltu a3, a0, s4
-; RV32IFD-NEXT:    add a2, a2, a3
-; RV32IFD-NEXT:  .LBB18_7: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    addi a4, a3, -1
-; RV32IFD-NEXT:    beq a1, a4, .LBB18_9
-; RV32IFD-NEXT:  # %bb.8: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    sltu a5, a1, a4
-; RV32IFD-NEXT:    or a6, a0, a2
-; RV32IFD-NEXT:    bnez a6, .LBB18_10
-; RV32IFD-NEXT:    j .LBB18_11
-; RV32IFD-NEXT:  .LBB18_9:
-; RV32IFD-NEXT:    sltiu a5, s2, -1
-; RV32IFD-NEXT:    or a6, a0, a2
-; RV32IFD-NEXT:    beqz a6, .LBB18_11
+; RV32IFD-NEXT:    add a1, a1, s6
+; RV32IFD-NEXT:    add s5, a0, s5
+; RV32IFD-NEXT:    sltu a2, s5, a0
+; RV32IFD-NEXT:    add a0, s5, s1
+; RV32IFD-NEXT:    add a1, a1, a2
+; RV32IFD-NEXT:    add a1, a1, s7
+; RV32IFD-NEXT:    sltu a2, a0, s5
+; RV32IFD-NEXT:    add a1, a1, a2
+; RV32IFD-NEXT:  .LBB18_6: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    addi a3, a2, -1
+; RV32IFD-NEXT:    beq s3, a3, .LBB18_8
+; RV32IFD-NEXT:  # %bb.7: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    sltu a4, s3, a3
+; RV32IFD-NEXT:    or a5, a0, a1
+; RV32IFD-NEXT:    bnez a5, .LBB18_9
+; RV32IFD-NEXT:    j .LBB18_10
+; RV32IFD-NEXT:  .LBB18_8:
+; RV32IFD-NEXT:    sltiu a4, s2, -1
+; RV32IFD-NEXT:    or a5, a0, a1
+; RV32IFD-NEXT:    beqz a5, .LBB18_10
+; RV32IFD-NEXT:  .LBB18_9: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    srli a4, a1, 31
 ; RV32IFD-NEXT:  .LBB18_10: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    srli a5, a2, 31
-; RV32IFD-NEXT:  .LBB18_11: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    neg a6, a5
-; RV32IFD-NEXT:    addi a7, a5, -1
-; RV32IFD-NEXT:    bnez a5, .LBB18_13
-; RV32IFD-NEXT:  # %bb.12: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    mv a1, a4
-; RV32IFD-NEXT:  .LBB18_13: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    or a4, a7, s2
-; RV32IFD-NEXT:    and a2, a6, a2
-; RV32IFD-NEXT:    and a5, a6, a0
-; RV32IFD-NEXT:    beq a1, a3, .LBB18_15
-; RV32IFD-NEXT:  # %bb.14: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    sltu a0, a3, a1
-; RV32IFD-NEXT:    j .LBB18_16
-; RV32IFD-NEXT:  .LBB18_15:
-; RV32IFD-NEXT:    snez a0, a4
-; RV32IFD-NEXT:  .LBB18_16: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    and a5, a5, a2
-; RV32IFD-NEXT:    li a3, -1
-; RV32IFD-NEXT:    beq a5, a3, .LBB18_18
-; RV32IFD-NEXT:  # %bb.17: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    srli a2, a2, 31
-; RV32IFD-NEXT:    xori a0, a2, 1
-; RV32IFD-NEXT:  .LBB18_18: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    bnez a0, .LBB18_20
-; RV32IFD-NEXT:  # %bb.19: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    lui a1, 524288
-; RV32IFD-NEXT:  .LBB18_20: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    neg a5, a4
+; RV32IFD-NEXT:    addi a6, a4, -1
+; RV32IFD-NEXT:    bnez a4, .LBB18_12
+; RV32IFD-NEXT:  # %bb.11: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    mv s3, a3
+; RV32IFD-NEXT:  .LBB18_12: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    or a3, a6, s2
+; RV32IFD-NEXT:    and a1, a5, a1
+; RV32IFD-NEXT:    and a4, a5, a0
+; RV32IFD-NEXT:    beq s3, a2, .LBB18_14
+; RV32IFD-NEXT:  # %bb.13: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    sltu a0, a2, s3
+; RV32IFD-NEXT:    j .LBB18_15
+; RV32IFD-NEXT:  .LBB18_14:
+; RV32IFD-NEXT:    snez a0, a3
+; RV32IFD-NEXT:  .LBB18_15: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    and a4, a4, a1
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    beq a4, a2, .LBB18_17
+; RV32IFD-NEXT:  # %bb.16: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    srli a1, a1, 31
+; RV32IFD-NEXT:    xori a0, a1, 1
+; RV32IFD-NEXT:  .LBB18_17: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    bnez a0, .LBB18_19
+; RV32IFD-NEXT:  # %bb.18: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    lui s3, 524288
+; RV32IFD-NEXT:  .LBB18_19: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    neg a0, a0
-; RV32IFD-NEXT:    and a0, a0, a4
+; RV32IFD-NEXT:    and a0, a0, a3
+; RV32IFD-NEXT:    mv a1, s3
 ; RV32IFD-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
@@ -1768,6 +1757,15 @@ define i64 @stest_f64i64(double %x) {
 ; RV32IFD-NEXT:    addi sp, sp, 144
 ; RV32IFD-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB18_20: # %fp-to-i-if-then5
+; RV32IFD-NEXT:    .cfi_restore_state
+; RV32IFD-NEXT:    xori a0, a3, 1
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    neg s2, a0
+; RV32IFD-NEXT:    sub a1, a1, a0
+; RV32IFD-NEXT:    mv s3, s2
+; RV32IFD-NEXT:    mv a0, s2
+; RV32IFD-NEXT:    j .LBB18_6
 ;
 ; RV64IFD-LABEL: stest_f64i64:
 ; RV64IFD:       # %bb.0: # %entry
@@ -1818,6 +1816,7 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IF-NEXT:    .cfi_offset s9, -44
 ; RV32IF-NEXT:    .cfi_offset s10, -48
 ; RV32IF-NEXT:    .cfi_offset s11, -52
+; RV32IF-NEXT:    .cfi_remember_state
 ; RV32IF-NEXT:    slli a2, a1, 1
 ; RV32IF-NEXT:    srli a2, a2, 21
 ; RV32IF-NEXT:    li a3, 1023
@@ -1827,23 +1826,15 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IF-NEXT:    li s9, 0
 ; RV32IF-NEXT:    li a0, 0
 ; RV32IF-NEXT:    li a1, 0
-; RV32IF-NEXT:    j .LBB19_7
+; RV32IF-NEXT:    j .LBB19_6
 ; RV32IF-NEXT:  .LBB19_2: # %fp-to-i-if-end
 ; RV32IF-NEXT:    addi a3, a2, -1151
 ; RV32IF-NEXT:    sltu a4, a3, a2
 ; RV32IF-NEXT:    sltiu a3, a3, -128
 ; RV32IF-NEXT:    or a4, a4, a3
 ; RV32IF-NEXT:    srli a3, a1, 31
-; RV32IF-NEXT:    beqz a4, .LBB19_4
-; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT:    xori a0, a3, 1
-; RV32IF-NEXT:    lui a1, 524288
-; RV32IF-NEXT:    neg s2, a0
-; RV32IF-NEXT:    sub a1, a1, a0
-; RV32IF-NEXT:    mv s9, s2
-; RV32IF-NEXT:    mv a0, s2
-; RV32IF-NEXT:    j .LBB19_7
-; RV32IF-NEXT:  .LBB19_4: # %fp-to-i-if-end9
+; RV32IF-NEXT:    bnez a4, .LBB19_7
+; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IF-NEXT:    neg s0, a3
 ; RV32IF-NEXT:    slli a1, a1, 12
 ; RV32IF-NEXT:    lui a3, 256
@@ -1851,8 +1842,8 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IF-NEXT:    srli a1, a1, 12
 ; RV32IF-NEXT:    or a1, a1, a3
 ; RV32IF-NEXT:    ori s8, s0, 1
-; RV32IF-NEXT:    bltu a4, a2, .LBB19_6
-; RV32IF-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT:    bltu a4, a2, .LBB19_5
+; RV32IF-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IF-NEXT:    sw zero, 24(sp)
 ; RV32IF-NEXT:    sw zero, 28(sp)
 ; RV32IF-NEXT:    sw zero, 32(sp)
@@ -1944,8 +1935,8 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IF-NEXT:    add a1, a1, a2
 ; RV32IF-NEXT:    add a1, a3, a1
 ; RV32IF-NEXT:    add a1, a1, a5
-; RV32IF-NEXT:    j .LBB19_7
-; RV32IF-NEXT:  .LBB19_6: # %fp-to-i-if-else
+; RV32IF-NEXT:    j .LBB19_6
+; RV32IF-NEXT:  .LBB19_5: # %fp-to-i-if-else
 ; RV32IF-NEXT:    sw a0, 56(sp)
 ; RV32IF-NEXT:    sw a1, 60(sp)
 ; RV32IF-NEXT:    sw zero, 64(sp)
@@ -2031,7 +2022,7 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IF-NEXT:    add a1, a1, s7
 ; RV32IF-NEXT:    sltu a2, a0, s4
 ; RV32IF-NEXT:    add a1, a1, a2
-; RV32IF-NEXT:  .LBB19_7: # %fp-to-i-cleanup
+; RV32IF-NEXT:  .LBB19_6: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    or a2, a1, a0
 ; RV32IF-NEXT:    xori a0, a0, 1
 ; RV32IF-NEXT:    seqz a2, a2
@@ -2071,6 +2062,15 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IF-NEXT:    addi sp, sp, 128
 ; RV32IF-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB19_7: # %fp-to-i-if-then5
+; RV32IF-NEXT:    .cfi_restore_state
+; RV32IF-NEXT:    xori a0, a3, 1
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    neg s2, a0
+; RV32IF-NEXT:    sub a1, a1, a0
+; RV32IF-NEXT:    mv s9, s2
+; RV32IF-NEXT:    mv a0, s2
+; RV32IF-NEXT:    j .LBB19_6
 ;
 ; RV64-LABEL: utest_f64i64:
 ; RV64:       # %bb.0: # %entry
@@ -2118,6 +2118,7 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IFD-NEXT:    .cfi_offset s9, -44
 ; RV32IFD-NEXT:    .cfi_offset s10, -48
 ; RV32IFD-NEXT:    .cfi_offset s11, -52
+; RV32IFD-NEXT:    .cfi_remember_state
 ; RV32IFD-NEXT:    fsd fa0, 16(sp)
 ; RV32IFD-NEXT:    lw a2, 20(sp)
 ; RV32IFD-NEXT:    slli a0, a2, 1
@@ -2129,23 +2130,15 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IFD-NEXT:    li s9, 0
 ; RV32IFD-NEXT:    li a0, 0
 ; RV32IFD-NEXT:    li a1, 0
-; RV32IFD-NEXT:    j .LBB19_7
+; RV32IFD-NEXT:    j .LBB19_6
 ; RV32IFD-NEXT:  .LBB19_2: # %fp-to-i-if-end
 ; RV32IFD-NEXT:    addi a1, a0, -1151
 ; RV32IFD-NEXT:    sltu a3, a1, a0
 ; RV32IFD-NEXT:    sltiu a1, a1, -128
 ; RV32IFD-NEXT:    or a1, a3, a1
 ; RV32IFD-NEXT:    srli a3, a2, 31
-; RV32IFD-NEXT:    beqz a1, .LBB19_4
-; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT:    xori a0, a3, 1
-; RV32IFD-NEXT:    lui a1, 524288
-; RV32IFD-NEXT:    neg s2, a0
-; RV32IFD-NEXT:    sub a1, a1, a0
-; RV32IFD-NEXT:    mv s9, s2
-; RV32IFD-NEXT:    mv a0, s2
-; RV32IFD-NEXT:    j .LBB19_7
-; RV32IFD-NEXT:  .LBB19_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT:    bnez a1, .LBB19_7
+; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IFD-NEXT:    lw a1, 16(sp)
 ; RV32IFD-NEXT:    neg s0, a3
 ; RV32IFD-NEXT:    slli a2, a2, 12
@@ -2154,8 +2147,8 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IFD-NEXT:    or a2, a2, a3
 ; RV32IFD-NEXT:    li a3, 1074
 ; RV32IFD-NEXT:    ori s8, s0, 1
-; RV32IFD-NEXT:    bltu a3, a0, .LBB19_6
-; RV32IFD-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT:    bltu a3, a0, .LBB19_5
+; RV32IFD-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IFD-NEXT:    sw zero, 40(sp)
 ; RV32IFD-NEXT:    sw zero, 44(sp)
 ; RV32IFD-NEXT:    sw zero, 48(sp)
@@ -2247,8 +2240,8 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IFD-NEXT:    add a1, a1, a2
 ; RV32IFD-NEXT:    add a1, a3, a1
 ; RV32IFD-NEXT:    add a1, a1, a5
-; RV32IFD-NEXT:    j .LBB19_7
-; RV32IFD-NEXT:  .LBB19_6: # %fp-to-i-if-else
+; RV32IFD-NEXT:    j .LBB19_6
+; RV32IFD-NEXT:  .LBB19_5: # %fp-to-i-if-else
 ; RV32IFD-NEXT:    sw a1, 72(sp)
 ; RV32IFD-NEXT:    sw a2, 76(sp)
 ; RV32IFD-NEXT:    sw zero, 80(sp)
@@ -2334,7 +2327,7 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IFD-NEXT:    add a1, a1, s7
 ; RV32IFD-NEXT:    sltu a2, a0, s4
 ; RV32IFD-NEXT:    add a1, a1, a2
-; RV32IFD-NEXT:  .LBB19_7: # %fp-to-i-cleanup
+; RV32IFD-NEXT:  .LBB19_6: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    or a2, a1, a0
 ; RV32IFD-NEXT:    xori a0, a0, 1
 ; RV32IFD-NEXT:    seqz a2, a2
@@ -2374,6 +2367,15 @@ define i64 @utest_f64i64(double %x) {
 ; RV32IFD-NEXT:    addi sp, sp, 144
 ; RV32IFD-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB19_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT:    .cfi_restore_state
+; RV32IFD-NEXT:    xori a0, a3, 1
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    neg s2, a0
+; RV32IFD-NEXT:    sub a1, a1, a0
+; RV32IFD-NEXT:    mv s9, s2
+; RV32IFD-NEXT:    mv a0, s2
+; RV32IFD-NEXT:    j .LBB19_6
 entry:
   %conv = fptoui double %x to i128
   %0 = icmp ult i128 %conv, 18446744073709551616
@@ -2422,24 +2424,15 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IF-NEXT:    li s9, 0
 ; RV32IF-NEXT:    li a0, 0
 ; RV32IF-NEXT:    li a1, 0
-; RV32IF-NEXT:    j .LBB20_6
+; RV32IF-NEXT:    j .LBB20_8
 ; RV32IF-NEXT:  .LBB20_2: # %fp-to-i-if-end
 ; RV32IF-NEXT:    addi a3, a2, -1151
 ; RV32IF-NEXT:    sltu a4, a3, a2
 ; RV32IF-NEXT:    sltiu a3, a3, -128
 ; RV32IF-NEXT:    or a4, a4, a3
 ; RV32IF-NEXT:    srli a3, a1, 31
-; RV32IF-NEXT:    beqz a4, .LBB20_4
-; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT:    xori a0, a3, 1
-; RV32IF-NEXT:    lui a1, 524288
-; RV32IF-NEXT:    neg s2, a0
-; RV32IF-NEXT:    sub a1, a1, a0
-; RV32IF-NEXT:    mv s9, s2
-; RV32IF-NEXT:    mv a0, s2
-; RV32IF-NEXT:    beqz a1, .LBB20_6
-; RV32IF-NEXT:    j .LBB20_8
-; RV32IF-NEXT:  .LBB20_4: # %fp-to-i-if-end9
+; RV32IF-NEXT:    bnez a4, .LBB20_7
+; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IF-NEXT:    neg s0, a3
 ; RV32IF-NEXT:    slli a1, a1, 12
 ; RV32IF-NEXT:    lui a3, 256
@@ -2447,8 +2440,8 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IF-NEXT:    srli a1, a1, 12
 ; RV32IF-NEXT:    or a1, a1, a3
 ; RV32IF-NEXT:    ori s8, s0, 1
-; RV32IF-NEXT:    bltu a4, a2, .LBB20_7
-; RV32IF-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT:    bltu a4, a2, .LBB20_5
+; RV32IF-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IF-NEXT:    sw zero, 24(sp)
 ; RV32IF-NEXT:    sw zero, 28(sp)
 ; RV32IF-NEXT:    sw zero, 32(sp)
@@ -2540,11 +2533,9 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IF-NEXT:    add a1, a1, a2
 ; RV32IF-NEXT:    add a1, a3, a1
 ; RV32IF-NEXT:    add a1, a1, a5
-; RV32IF-NEXT:    bnez a1, .LBB20_8
-; RV32IF-NEXT:  .LBB20_6:
-; RV32IF-NEXT:    seqz a2, a0
-; RV32IF-NEXT:    j .LBB20_9
-; RV32IF-NEXT:  .LBB20_7: # %fp-to-i-if-else
+; RV32IF-NEXT:    bnez a1, .LBB20_6
+; RV32IF-NEXT:    j .LBB20_8
+; RV32IF-NEXT:  .LBB20_5: # %fp-to-i-if-else
 ; RV32IF-NEXT:    sw a0, 56(sp)
 ; RV32IF-NEXT:    sw a1, 60(sp)
 ; RV32IF-NEXT:    sw zero, 64(sp)
@@ -2630,9 +2621,20 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IF-NEXT:    add a1, a1, s7
 ; RV32IF-NEXT:    sltu a2, a0, s4
 ; RV32IF-NEXT:    add a1, a1, a2
-; RV32IF-NEXT:    beqz a1, .LBB20_6
-; RV32IF-NEXT:  .LBB20_8: # %fp-to-i-cleanup
+; RV32IF-NEXT:    beqz a1, .LBB20_8
+; RV32IF-NEXT:  .LBB20_6: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    srli a2, a1, 31
+; RV32IF-NEXT:    j .LBB20_9
+; RV32IF-NEXT:  .LBB20_7: # %fp-to-i-if-then5
+; RV32IF-NEXT:    xori a0, a3, 1
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    neg s2, a0
+; RV32IF-NEXT:    sub a1, a1, a0
+; RV32IF-NEXT:    mv s9, s2
+; RV32IF-NEXT:    mv a0, s2
+; RV32IF-NEXT:    bnez a1, .LBB20_6
+; RV32IF-NEXT:  .LBB20_8:
+; RV32IF-NEXT:    seqz a2, a0
 ; RV32IF-NEXT:  .LBB20_9: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    xori a3, a0, 1
 ; RV32IF-NEXT:    or a3, a3, a1
@@ -2763,24 +2765,15 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IFD-NEXT:    li s9, 0
 ; RV32IFD-NEXT:    li a0, 0
 ; RV32IFD-NEXT:    li a1, 0
-; RV32IFD-NEXT:    j .LBB20_6
+; RV32IFD-NEXT:    j .LBB20_8
 ; RV32IFD-NEXT:  .LBB20_2: # %fp-to-i-if-end
 ; RV32IFD-NEXT:    addi a1, a0, -1151
 ; RV32IFD-NEXT:    sltu a3, a1, a0
 ; RV32IFD-NEXT:    sltiu a1, a1, -128
 ; RV32IFD-NEXT:    or a1, a3, a1
 ; RV32IFD-NEXT:    srli a3, a2, 31
-; RV32IFD-NEXT:    beqz a1, .LBB20_4
-; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT:    xori a0, a3, 1
-; RV32IFD-NEXT:    lui a1, 524288
-; RV32IFD-NEXT:    neg s2, a0
-; RV32IFD-NEXT:    sub a1, a1, a0
-; RV32IFD-NEXT:    mv s9, s2
-; RV32IFD-NEXT:    mv a0, s2
-; RV32IFD-NEXT:    beqz a1, .LBB20_6
-; RV32IFD-NEXT:    j .LBB20_8
-; RV32IFD-NEXT:  .LBB20_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT:    bnez a1, .LBB20_7
+; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IFD-NEXT:    lw a1, 16(sp)
 ; RV32IFD-NEXT:    neg s0, a3
 ; RV32IFD-NEXT:    slli a2, a2, 12
@@ -2789,8 +2782,8 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IFD-NEXT:    or a2, a2, a3
 ; RV32IFD-NEXT:    li a3, 1074
 ; RV32IFD-NEXT:    ori s8, s0, 1
-; RV32IFD-NEXT:    bltu a3, a0, .LBB20_7
-; RV32IFD-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT:    bltu a3, a0, .LBB20_5
+; RV32IFD-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IFD-NEXT:    sw zero, 40(sp)
 ; RV32IFD-NEXT:    sw zero, 44(sp)
 ; RV32IFD-NEXT:    sw zero, 48(sp)
@@ -2882,11 +2875,9 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IFD-NEXT:    add a1, a1, a2
 ; RV32IFD-NEXT:    add a1, a3, a1
 ; RV32IFD-NEXT:    add a1, a1, a5
-; RV32IFD-NEXT:    bnez a1, .LBB20_8
-; RV32IFD-NEXT:  .LBB20_6:
-; RV32IFD-NEXT:    seqz a2, a0
-; RV32IFD-NEXT:    j .LBB20_9
-; RV32IFD-NEXT:  .LBB20_7: # %fp-to-i-if-else
+; RV32IFD-NEXT:    bnez a1, .LBB20_6
+; RV32IFD-NEXT:    j .LBB20_8
+; RV32IFD-NEXT:  .LBB20_5: # %fp-to-i-if-else
 ; RV32IFD-NEXT:    sw a1, 72(sp)
 ; RV32IFD-NEXT:    sw a2, 76(sp)
 ; RV32IFD-NEXT:    sw zero, 80(sp)
@@ -2972,9 +2963,20 @@ define i64 @ustest_f64i64(double %x) {
 ; RV32IFD-NEXT:    add a1, a1, s7
 ; RV32IFD-NEXT:    sltu a2, a0, s4
 ; RV32IFD-NEXT:    add a1, a1, a2
-; RV32IFD-NEXT:    beqz a1, .LBB20_6
-; RV32IFD-NEXT:  .LBB20_8: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    beqz a1, .LBB20_8
+; RV32IFD-NEXT:  .LBB20_6: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    srli a2, a1, 31
+; RV32IFD-NEXT:    j .LBB20_9
+; RV32IFD-NEXT:  .LBB20_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT:    xori a0, a3, 1
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    neg s2, a0
+; RV32IFD-NEXT:    sub a1, a1, a0
+; RV32IFD-NEXT:    mv s9, s2
+; RV32IFD-NEXT:    mv a0, s2
+; RV32IFD-NEXT:    bnez a1, .LBB20_6
+; RV32IFD-NEXT:  .LBB20_8:
+; RV32IFD-NEXT:    seqz a2, a0
 ; RV32IFD-NEXT:  .LBB20_9: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    xori a3, a0, 1
 ; RV32IFD-NEXT:    or a3, a3, a1
@@ -3075,6 +3077,7 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    .cfi_offset s9, -44
 ; RV32-NEXT:    .cfi_offset s10, -48
 ; RV32-NEXT:    .cfi_offset s11, -52
+; RV32-NEXT:    .cfi_remember_state
 ; RV32-NEXT:    fmv.x.w a1, fa0
 ; RV32-NEXT:    slli a0, a1, 1
 ; RV32-NEXT:    srli a0, a0, 24
@@ -3082,26 +3085,17 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    bgeu a0, a2, .LBB21_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li s2, 0
-; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    li s3, 0
 ; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    j .LBB21_7
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    j .LBB21_6
 ; RV32-NEXT:  .LBB21_2: # %fp-to-i-if-end
 ; RV32-NEXT:    addi a2, a0, -255
 ; RV32-NEXT:    sltu a3, a2, a0
 ; RV32-NEXT:    sltiu a2, a2, -128
 ; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    beqz a2, .LBB21_4
-; RV32-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    xori a2, a1, 1
-; RV32-NEXT:    addi s2, a1, -1
-; RV32-NEXT:    sub a2, a0, a2
-; RV32-NEXT:    mv a1, s2
-; RV32-NEXT:    mv a0, s2
-; RV32-NEXT:    j .LBB21_7
-; RV32-NEXT:  .LBB21_4: # %fp-to-i-if-end9
+; RV32-NEXT:    bnez a2, .LBB21_20
+; RV32-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32-NEXT:    srai s0, a1, 31
 ; RV32-NEXT:    slli a1, a1, 9
 ; RV32-NEXT:    lui a2, 2048
@@ -3109,8 +3103,8 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    srli a1, a1, 9
 ; RV32-NEXT:    or a1, a1, a2
 ; RV32-NEXT:    ori s8, s0, 1
-; RV32-NEXT:    bltu a3, a0, .LBB21_6
-; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT:    bltu a3, a0, .LBB21_5
+; RV32-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32-NEXT:    sw zero, 24(sp)
 ; RV32-NEXT:    sw zero, 28(sp)
 ; RV32-NEXT:    sw zero, 32(sp)
@@ -3142,70 +3136,68 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    or a3, a3, a6
 ; RV32-NEXT:    sw a3, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    or s5, a2, a7
-; RV32-NEXT:    or s6, a0, a5
-; RV32-NEXT:    srl s4, a4, a1
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    or s6, a2, a7
+; RV32-NEXT:    or s7, a0, a5
+; RV32-NEXT:    srl s5, a4, a1
+; RV32-NEXT:    mv a0, s7
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s2, a0
-; RV32-NEXT:    mv s7, a1
-; RV32-NEXT:    mv a0, s5
+; RV32-NEXT:    mv s3, a1
+; RV32-NEXT:    mv a0, s6
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s7, a0, s7
-; RV32-NEXT:    sltu a0, s7, a0
-; RV32-NEXT:    add s9, a1, a0
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    add s3, a0, s3
+; RV32-NEXT:    sltu a0, s3, a0
+; RV32-NEXT:    add s10, a1, a0
+; RV32-NEXT:    mv a0, s7
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s10, a0, s7
-; RV32-NEXT:    sltu a0, s10, a0
+; RV32-NEXT:    add s3, a0, s3
+; RV32-NEXT:    sltu a0, s3, a0
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s11, s9, a0
-; RV32-NEXT:    mv a0, s5
+; RV32-NEXT:    add s11, s10, a0
+; RV32-NEXT:    mv a0, s6
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s1, a0
-; RV32-NEXT:    mv s3, s8
-; RV32-NEXT:    mv s8, a1
-; RV32-NEXT:    add s7, a0, s11
+; RV32-NEXT:    mv s9, a1
+; RV32-NEXT:    mv s4, s8
+; RV32-NEXT:    add s8, a0, s11
 ; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    mv a1, s0
-; RV32-NEXT:    mv a2, s6
-; RV32-NEXT:    mv a3, s5
+; RV32-NEXT:    mv a2, s7
+; RV32-NEXT:    mv a3, s6
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s5, a0
-; RV32-NEXT:    mv s6, a1
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    mv s7, a1
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    mv a1, s0
 ; RV32-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a3, s4
+; RV32-NEXT:    mv a3, s5
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, s10
-; RV32-NEXT:    add a3, s5, a0
-; RV32-NEXT:    sltu a4, s7, s1
-; RV32-NEXT:    sltu a5, s11, s9
-; RV32-NEXT:    add a2, s6, a2
-; RV32-NEXT:    add a0, s7, a3
-; RV32-NEXT:    add a5, s8, a5
-; RV32-NEXT:    sltu a3, a3, s5
-; RV32-NEXT:    sltu a6, a0, s7
-; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    add a2, a4, a2
-; RV32-NEXT:    add a2, a2, a6
-; RV32-NEXT:    j .LBB21_7
-; RV32-NEXT:  .LBB21_6: # %fp-to-i-if-else
+; RV32-NEXT:    add a2, s6, a0
+; RV32-NEXT:    sltu a3, s8, s1
+; RV32-NEXT:    sltu a4, s11, s10
+; RV32-NEXT:    add a1, s7, a1
+; RV32-NEXT:    add a0, s8, a2
+; RV32-NEXT:    add a4, s9, a4
+; RV32-NEXT:    sltu a2, a2, s6
+; RV32-NEXT:    sltu a5, a0, s8
+; RV32-NEXT:    add a3, a4, a3
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a1, a1, a5
+; RV32-NEXT:    j .LBB21_6
+; RV32-NEXT:  .LBB21_5: # %fp-to-i-if-else
 ; RV32-NEXT:    sw a1, 56(sp)
 ; RV32-NEXT:    sw zero, 60(sp)
 ; RV32-NEXT:    sw zero, 64(sp)
@@ -3234,10 +3226,10 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    sll a2, a2, a0
 ; RV32-NEXT:    sll a5, a5, a0
 ; RV32-NEXT:    sll a1, a1, a0
-; RV32-NEXT:    or s3, a2, a6
+; RV32-NEXT:    or s4, a2, a6
 ; RV32-NEXT:    or a2, a5, a7
 ; RV32-NEXT:    or a3, a1, a3
-; RV32-NEXT:    sll s4, a4, a0
+; RV32-NEXT:    sll s3, a4, a0
 ; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    mv a1, s0
 ; RV32-NEXT:    call __muldi3
@@ -3245,21 +3237,21 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    mv s5, a1
 ; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    mv a1, s0
-; RV32-NEXT:    mv a2, s4
-; RV32-NEXT:    mv a3, s3
+; RV32-NEXT:    mv a2, s3
+; RV32-NEXT:    mv a3, s4
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add a1, a1, s5
 ; RV32-NEXT:    add s1, a0, s2
 ; RV32-NEXT:    sltu a0, s1, a0
 ; RV32-NEXT:    add s7, a1, a0
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s5, a0
 ; RV32-NEXT:    mv s6, a1
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    mv a0, s3
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
@@ -3268,76 +3260,75 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    add s8, s5, a1
 ; RV32-NEXT:    sltu a0, s8, s5
 ; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    mv a0, s3
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s8, a0, s8
-; RV32-NEXT:    sltu a0, s8, a0
+; RV32-NEXT:    add s3, a0, s8
+; RV32-NEXT:    sltu a0, s3, a0
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s4, s6, a0
-; RV32-NEXT:    sltu s5, s4, s6
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    add s5, s6, a0
+; RV32-NEXT:    sltu s6, s5, s6
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, s8
-; RV32-NEXT:    add a2, a2, s5
-; RV32-NEXT:    add s4, a0, s4
-; RV32-NEXT:    sltu a3, s4, a0
-; RV32-NEXT:    add a0, s4, s1
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    add a2, a2, s7
-; RV32-NEXT:    sltu a3, a0, s4
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:  .LBB21_7: # %fp-to-i-cleanup
-; RV32-NEXT:    lui a3, 524288
-; RV32-NEXT:    addi a4, a3, -1
-; RV32-NEXT:    beq a1, a4, .LBB21_9
-; RV32-NEXT:  # %bb.8: # %fp-to-i-cleanup
-; RV32-NEXT:    sltu a5, a1, a4
-; RV32-NEXT:    or a6, a0, a2
-; RV32-NEXT:    bnez a6, .LBB21_10
-; RV32-NEXT:    j .LBB21_11
-; RV32-NEXT:  .LBB21_9:
-; RV32-NEXT:    sltiu a5, s2, -1
-; RV32-NEXT:    or a6, a0, a2
-; RV32-NEXT:    beqz a6, .LBB21_11
+; RV32-NEXT:    add a1, a1, s6
+; RV32-NEXT:    add s5, a0, s5
+; RV32-NEXT:    sltu a2, s5, a0
+; RV32-NEXT:    add a0, s5, s1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, a1, s7
+; RV32-NEXT:    sltu a2, a0, s5
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:  .LBB21_6: # %fp-to-i-cleanup
+; RV32-NEXT:    lui a2, 524288
+; RV32-NEXT:    addi a3, a2, -1
+; RV32-NEXT:    beq s3, a3, .LBB21_8
+; RV32-NEXT:  # %bb.7: # %fp-to-i-cleanup
+; RV32-NEXT:    sltu a4, s3, a3
+; RV32-NEXT:    or a5, a0, a1
+; RV32-NEXT:    bnez a5, .LBB21_9
+; RV32-NEXT:    j .LBB21_10
+; RV32-NEXT:  .LBB21_8:
+; RV32-NEXT:    sltiu a4, s2, -1
+; RV32-NEXT:    or a5, a0, a1
+; RV32-NEXT:    beqz a5, .LBB21_10
+; RV32-NEXT:  .LBB21_9: # %fp-to-i-cleanup
+; RV32-NEXT:    srli a4, a1, 31
 ; RV32-NEXT:  .LBB21_10: # %fp-to-i-cleanup
-; RV32-NEXT:    srli a5, a2, 31
-; RV32-NEXT:  .LBB21_11: # %fp-to-i-cleanup
-; RV32-NEXT:    neg a6, a5
-; RV32-NEXT:    addi a7, a5, -1
-; RV32-NEXT:    bnez a5, .LBB21_13
-; RV32-NEXT:  # %bb.12: # %fp-to-i-cleanup
-; RV32-NEXT:    mv a1, a4
-; RV32-NEXT:  .LBB21_13: # %fp-to-i-cleanup
-; RV32-NEXT:    or a4, a7, s2
-; RV32-NEXT:    and a2, a6, a2
-; RV32-NEXT:    and a5, a6, a0
-; RV32-NEXT:    beq a1, a3, .LBB21_15
-; RV32-NEXT:  # %bb.14: # %fp-to-i-cleanup
-; RV32-NEXT:    sltu a0, a3, a1
-; RV32-NEXT:    j .LBB21_16
-; RV32-NEXT:  .LBB21_15:
-; RV32-NEXT:    snez a0, a4
-; RV32-NEXT:  .LBB21_16: # %fp-to-i-cleanup
-; RV32-NEXT:    and a5, a5, a2
-; RV32-NEXT:    li a3, -1
-; RV32-NEXT:    beq a5, a3, .LBB21_18
-; RV32-NEXT:  # %bb.17: # %fp-to-i-cleanup
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    xori a0, a2, 1
-; RV32-NEXT:  .LBB21_18: # %fp-to-i-cleanup
-; RV32-NEXT:    bnez a0, .LBB21_20
-; RV32-NEXT:  # %bb.19: # %fp-to-i-cleanup
-; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:  .LBB21_20: # %fp-to-i-cleanup
+; RV32-NEXT:    neg a5, a4
+; RV32-NEXT:    addi a6, a4, -1
+; RV32-NEXT:    bnez a4, .LBB21_12
+; RV32-NEXT:  # %bb.11: # %fp-to-i-cleanup
+; RV32-NEXT:    mv s3, a3
+; RV32-NEXT:  .LBB21_12: # %fp-to-i-cleanup
+; RV32-NEXT:    or a3, a6, s2
+; RV32-NEXT:    and a1, a5, a1
+; RV32-NEXT:    and a4, a5, a0
+; RV32-NEXT:    beq s3, a2, .LBB21_14
+; RV32-NEXT:  # %bb.13: # %fp-to-i-cleanup
+; RV32-NEXT:    sltu a0, a2, s3
+; RV32-NEXT:    j .LBB21_15
+; RV32-NEXT:  .LBB21_14:
+; RV32-NEXT:    snez a0, a3
+; RV32-NEXT:  .LBB21_15: # %fp-to-i-cleanup
+; RV32-NEXT:    and a4, a4, a1
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    beq a4, a2, .LBB21_17
+; RV32-NEXT:  # %bb.16: # %fp-to-i-cleanup
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    xori a0, a1, 1
+; RV32-NEXT:  .LBB21_17: # %fp-to-i-cleanup
+; RV32-NEXT:    bnez a0, .LBB21_19
+; RV32-NEXT:  # %bb.18: # %fp-to-i-cleanup
+; RV32-NEXT:    lui s3, 524288
+; RV32-NEXT:  .LBB21_19: # %fp-to-i-cleanup
 ; RV32-NEXT:    neg a0, a0
-; RV32-NEXT:    and a0, a0, a4
+; RV32-NEXT:    and a0, a0, a3
+; RV32-NEXT:    mv a1, s3
 ; RV32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
@@ -3367,6 +3358,16 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    addi sp, sp, 128
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB21_20: # %fp-to-i-if-then5
+; RV32-NEXT:    .cfi_restore_state
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    xori a2, a1, 1
+; RV32-NEXT:    addi s2, a1, -1
+; RV32-NEXT:    sub a1, a0, a2
+; RV32-NEXT:    mv s3, s2
+; RV32-NEXT:    mv a0, s2
+; RV32-NEXT:    j .LBB21_6
 ;
 ; RV64-LABEL: stest_f32i64:
 ; RV64:       # %bb.0: # %entry
@@ -3417,6 +3418,7 @@ define i64 @utest_f32i64(float %x) {
 ; RV32-NEXT:    .cfi_offset s9, -44
 ; RV32-NEXT:    .cfi_offset s10, -48
 ; RV32-NEXT:    .cfi_offset s11, -52
+; RV32-NEXT:    .cfi_remember_state
 ; RV32-NEXT:    fmv.x.w a1, fa0
 ; RV32-NEXT:    slli a0, a1, 1
 ; RV32-NEXT:    srli a0, a0, 24
@@ -3427,23 +3429,14 @@ define i64 @utest_f32i64(float %x) {
 ; RV32-NEXT:    li s9, 0
 ; RV32-NEXT:    li a0, 0
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    j .LBB22_7
+; RV32-NEXT:    j .LBB22_6
 ; RV32-NEXT:  .LBB22_2: # %fp-to-i-if-end
 ; RV32-NEXT:    addi a2, a0, -255
 ; RV32-NEXT:    sltu a3, a2, a0
 ; RV32-NEXT:    sltiu a2, a2, -128
 ; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    beqz a2, .LBB22_4
-; RV32-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    xori a2, a1, 1
-; RV32-NEXT:    addi s2, a1, -1
-; RV32-NEXT:    sub a1, a0, a2
-; RV32-NEXT:    mv s9, s2
-; RV32-NEXT:    mv a0, s2
-; RV32-NEXT:    j .LBB22_7
-; RV32-NEXT:  .LBB22_4: # %fp-to-i-if-end9
+; RV32-NEXT:    bnez a2, .LBB22_7
+; RV32-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32-NEXT:    srai s0, a1, 31
 ; RV32-NEXT:    slli a1, a1, 9
 ; RV32-NEXT:    lui a2, 2048
@@ -3451,8 +3444,8 @@ define i64 @utest_f32i64(float %x) {
 ; RV32-NEXT:    srli a1, a1, 9
 ; RV32-NEXT:    or a1, a1, a2
 ; RV32-NEXT:    ori s8, s0, 1
-; RV32-NEXT:    bltu a3, a0, .LBB22_6
-; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT:    bltu a3, a0, .LBB22_5
+; RV32-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32-NEXT:    sw zero, 24(sp)
 ; RV32-NEXT:    sw zero, 28(sp)
 ; RV32-NEXT:    sw zero, 32(sp)
@@ -3544,8 +3537,8 @@ define i64 @utest_f32i64(float %x) {
 ; RV32-NEXT:    add a1, a1, a2
 ; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a1, a1, a5
-; RV32-NEXT:    j .LBB22_7
-; RV32-NEXT:  .LBB22_6: # %fp-to-i-if-else
+; RV32-NEXT:    j .LBB22_6
+; RV32-NEXT:  .LBB22_5: # %fp-to-i-if-else
 ; RV32-NEXT:    sw a1, 56(sp)
 ; RV32-NEXT:    sw zero, 60(sp)
 ; RV32-NEXT:    sw zero, 64(sp)
@@ -3631,7 +3624,7 @@ define i64 @utest_f32i64(float %x) {
 ; RV32-NEXT:    add a1, a1, s7
 ; RV32-NEXT:    sltu a2, a0, s4
 ; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:  .LBB22_7: # %fp-to-i-cleanup
+; RV32-NEXT:  .LBB22_6: # %fp-to-i-cleanup
 ; RV32-NEXT:    or a2, a1, a0
 ; RV32-NEXT:    xori a0, a0, 1
 ; RV32-NEXT:    seqz a2, a2
@@ -3671,6 +3664,16 @@ define i64 @utest_f32i64(float %x) {
 ; RV32-NEXT:    addi sp, sp, 128
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB22_7: # %fp-to-i-if-then5
+; RV32-NEXT:    .cfi_restore_state
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    xori a2, a1, 1
+; RV32-NEXT:    addi s2, a1, -1
+; RV32-NEXT:    sub a1, a0, a2
+; RV32-NEXT:    mv s9, s2
+; RV32-NEXT:    mv a0, s2
+; RV32-NEXT:    j .LBB22_6
 ;
 ; RV64-LABEL: utest_f32i64:
 ; RV64:       # %bb.0: # %entry
@@ -3736,24 +3739,14 @@ define i64 @ustest_f32i64(float %x) {
 ; RV32-NEXT:    li s9, 0
 ; RV32-NEXT:    li a0, 0
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    j .LBB23_6
+; RV32-NEXT:    j .LBB23_8
 ; RV32-NEXT:  .LBB23_2: # %fp-to-i-if-end
 ; RV32-NEXT:    addi a2, a0, -255
 ; RV32-NEXT:    sltu a3, a2, a0
 ; RV32-NEXT:    sltiu a2, a2, -128
 ; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    beqz a2, .LBB23_4
-; RV32-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    xori a2, a1, 1
-; RV32-NEXT:    addi s2, a1, -1
-; RV32-NEXT:    sub a1, a0, a2
-; RV32-NEXT:    mv s9, s2
-; RV32-NEXT:    mv a0, s2
-; RV32-NEXT:    beqz a1, .LBB23_6
-; RV32-NEXT:    j .LBB23_8
-; RV32-NEXT:  .LBB23_4: # %fp-to-i-if-end9
+; RV32-NEXT:    bnez a2, .LBB23_7
+; RV32-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32-NEXT:    srai s0, a1, 31
 ; RV32-NEXT:    slli a1, a1, 9
 ; RV32-NEXT:    lui a2, 2048
@@ -3761,8 +3754,8 @@ define i64 @ustest_f32i64(float %x) {
 ; RV32-NEXT:    srli a1, a1, 9
 ; RV32-NEXT:    or a1, a1, a2
 ; RV32-NEXT:    ori s8, s0, 1
-; RV32-NEXT:    bltu a3, a0, .LBB23_7
-; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT:    bltu a3, a0, .LBB23_5
+; RV32-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32-NEXT:    sw zero, 24(sp)
 ; RV32-NEXT:    sw zero, 28(sp)
 ; RV32-NEXT:    sw zero, 32(sp)
@@ -3854,11 +3847,9 @@ define i64 @ustest_f32i64(float %x) {
 ; RV32-NEXT:    add a1, a1, a2
 ; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a1, a1, a5
-; RV32-NEXT:    bnez a1, .LBB23_8
-; RV32-NEXT:  .LBB23_6:
-; RV32-NEXT:    seqz a2, a0
-; RV32-NEXT:    j .LBB23_9
-; RV32-NEXT:  .LBB23_7: # %fp-to-i-if-else
+; RV32-NEXT:    bnez a1, .LBB23_6
+; RV32-NEXT:    j .LBB23_8
+; RV32-NEXT:  .LBB23_5: # %fp-to-i-if-else
 ; RV32-NEXT:    sw a1, 56(sp)
 ; RV32-NEXT:    sw zero, 60(sp)
 ; RV32-NEXT:    sw zero, 64(sp)
@@ -3944,9 +3935,21 @@ define i64 @ustest_f32i64(float %x) {
 ; RV32-NEXT:    add a1, a1, s7
 ; RV32-NEXT:    sltu a2, a0, s4
 ; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:    beqz a1, .LBB23_6
-; RV32-NEXT:  .LBB23_8: # %fp-to-i-cleanup
+; RV32-NEXT:    beqz a1, .LBB23_8
+; RV32-NEXT:  .LBB23_6: # %fp-to-i-cleanup
 ; RV32-NEXT:    srli a2, a1, 31
+; RV32-NEXT:    j .LBB23_9
+; RV32-NEXT:  .LBB23_7: # %fp-to-i-if-then5
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    xori a2, a1, 1
+; RV32-NEXT:    addi s2, a1, -1
+; RV32-NEXT:    sub a1, a0, a2
+; RV32-NEXT:    mv s9, s2
+; RV32-NEXT:    mv a0, s2
+; RV32-NEXT:    bnez a1, .LBB23_6
+; RV32-NEXT:  .LBB23_8:
+; RV32-NEXT:    seqz a2, a0
 ; RV32-NEXT:  .LBB23_9: # %fp-to-i-cleanup
 ; RV32-NEXT:    xori a3, a0, 1
 ; RV32-NEXT:    or a3, a3, a1
@@ -5337,32 +5340,25 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    .cfi_offset s9, -44
 ; RV32IF-NEXT:    .cfi_offset s10, -48
 ; RV32IF-NEXT:    .cfi_offset s11, -52
+; RV32IF-NEXT:    .cfi_remember_state
 ; RV32IF-NEXT:    slli a2, a1, 1
 ; RV32IF-NEXT:    srli a2, a2, 21
 ; RV32IF-NEXT:    li a3, 1023
 ; RV32IF-NEXT:    bgeu a2, a3, .LBB45_2
 ; RV32IF-NEXT:  # %bb.1:
 ; RV32IF-NEXT:    li s2, 0
-; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    li s3, 0
 ; RV32IF-NEXT:    li a0, 0
-; RV32IF-NEXT:    li a2, 0
-; RV32IF-NEXT:    j .LBB45_7
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    j .LBB45_6
 ; RV32IF-NEXT:  .LBB45_2: # %fp-to-i-if-end
 ; RV32IF-NEXT:    addi a3, a2, -1151
 ; RV32IF-NEXT:    sltu a4, a3, a2
 ; RV32IF-NEXT:    sltiu a3, a3, -128
 ; RV32IF-NEXT:    or a4, a4, a3
 ; RV32IF-NEXT:    srli a3, a1, 31
-; RV32IF-NEXT:    beqz a4, .LBB45_4
-; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT:    xori a0, a3, 1
-; RV32IF-NEXT:    lui a2, 524288
-; RV32IF-NEXT:    neg s2, a0
-; RV32IF-NEXT:    sub a2, a2, a0
-; RV32IF-NEXT:    mv a1, s2
-; RV32IF-NEXT:    mv a0, s2
-; RV32IF-NEXT:    j .LBB45_7
-; RV32IF-NEXT:  .LBB45_4: # %fp-to-i-if-end9
+; RV32IF-NEXT:    bnez a4, .LBB45_20
+; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IF-NEXT:    neg s0, a3
 ; RV32IF-NEXT:    slli a1, a1, 12
 ; RV32IF-NEXT:    lui a3, 256
@@ -5370,8 +5366,8 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    srli a1, a1, 12
 ; RV32IF-NEXT:    or a1, a1, a3
 ; RV32IF-NEXT:    ori s8, s0, 1
-; RV32IF-NEXT:    bltu a4, a2, .LBB45_6
-; RV32IF-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT:    bltu a4, a2, .LBB45_5
+; RV32IF-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IF-NEXT:    sw zero, 24(sp)
 ; RV32IF-NEXT:    sw zero, 28(sp)
 ; RV32IF-NEXT:    sw zero, 32(sp)
@@ -5403,70 +5399,68 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    srl a1, a1, a0
 ; RV32IF-NEXT:    or a3, a3, a6
 ; RV32IF-NEXT:    sw a3, 4(sp) # 4-byte Folded Spill
-; RV32IF-NEXT:    or s5, a2, a7
-; RV32IF-NEXT:    or s6, a1, a5
-; RV32IF-NEXT:    srl s4, a4, a0
-; RV32IF-NEXT:    mv a0, s6
+; RV32IF-NEXT:    or s6, a2, a7
+; RV32IF-NEXT:    or s7, a1, a5
+; RV32IF-NEXT:    srl s5, a4, a0
+; RV32IF-NEXT:    mv a0, s7
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    mv s2, a0
-; RV32IF-NEXT:    mv s7, a1
-; RV32IF-NEXT:    mv a0, s5
+; RV32IF-NEXT:    mv s3, a1
+; RV32IF-NEXT:    mv a0, s6
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    add s7, a0, s7
-; RV32IF-NEXT:    sltu a0, s7, a0
-; RV32IF-NEXT:    add s9, a1, a0
-; RV32IF-NEXT:    mv a0, s6
+; RV32IF-NEXT:    add s3, a0, s3
+; RV32IF-NEXT:    sltu a0, s3, a0
+; RV32IF-NEXT:    add s10, a1, a0
+; RV32IF-NEXT:    mv a0, s7
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    add s10, a0, s7
-; RV32IF-NEXT:    sltu a0, s10, a0
+; RV32IF-NEXT:    add s3, a0, s3
+; RV32IF-NEXT:    sltu a0, s3, a0
 ; RV32IF-NEXT:    add a0, a1, a0
-; RV32IF-NEXT:    add s11, s9, a0
-; RV32IF-NEXT:    mv a0, s5
+; RV32IF-NEXT:    add s11, s10, a0
+; RV32IF-NEXT:    mv a0, s6
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    mv s1, a0
-; RV32IF-NEXT:    mv s3, s8
-; RV32IF-NEXT:    mv s8, a1
-; RV32IF-NEXT:    add s7, a0, s11
+; RV32IF-NEXT:    mv s9, a1
+; RV32IF-NEXT:    mv s4, s8
+; RV32IF-NEXT:    add s8, a0, s11
 ; RV32IF-NEXT:    mv a0, s0
 ; RV32IF-NEXT:    mv a1, s0
-; RV32IF-NEXT:    mv a2, s6
-; RV32IF-NEXT:    mv a3, s5
+; RV32IF-NEXT:    mv a2, s7
+; RV32IF-NEXT:    mv a3, s6
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    mv s5, a0
-; RV32IF-NEXT:    mv s6, a1
-; RV32IF-NEXT:    mv a0, s3
+; RV32IF-NEXT:    mv s6, a0
+; RV32IF-NEXT:    mv s7, a1
+; RV32IF-NEXT:    mv a0, s4
 ; RV32IF-NEXT:    mv a1, s0
 ; RV32IF-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32IF-NEXT:    mv a3, s4
+; RV32IF-NEXT:    mv a3, s5
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    mv a2, a1
-; RV32IF-NEXT:    mv a1, s10
-; RV32IF-NEXT:    add a3, s5, a0
-; RV32IF-NEXT:    sltu a4, s7, s1
-; RV32IF-NEXT:    sltu a5, s11, s9
-; RV32IF-NEXT:    add a2, s6, a2
-; RV32IF-NEXT:    add a0, s7, a3
-; RV32IF-NEXT:    add a5, s8, a5
-; RV32IF-NEXT:    sltu a3, a3, s5
-; RV32IF-NEXT:    sltu a6, a0, s7
-; RV32IF-NEXT:    add a4, a5, a4
-; RV32IF-NEXT:    add a2, a2, a3
-; RV32IF-NEXT:    add a2, a4, a2
-; RV32IF-NEXT:    add a2, a2, a6
-; RV32IF-NEXT:    j .LBB45_7
-; RV32IF-NEXT:  .LBB45_6: # %fp-to-i-if-else
+; RV32IF-NEXT:    add a2, s6, a0
+; RV32IF-NEXT:    sltu a3, s8, s1
+; RV32IF-NEXT:    sltu a4, s11, s10
+; RV32IF-NEXT:    add a1, s7, a1
+; RV32IF-NEXT:    add a0, s8, a2
+; RV32IF-NEXT:    add a4, s9, a4
+; RV32IF-NEXT:    sltu a2, a2, s6
+; RV32IF-NEXT:    sltu a5, a0, s8
+; RV32IF-NEXT:    add a3, a4, a3
+; RV32IF-NEXT:    add a1, a1, a2
+; RV32IF-NEXT:    add a1, a3, a1
+; RV32IF-NEXT:    add a1, a1, a5
+; RV32IF-NEXT:    j .LBB45_6
+; RV32IF-NEXT:  .LBB45_5: # %fp-to-i-if-else
 ; RV32IF-NEXT:    sw a0, 56(sp)
 ; RV32IF-NEXT:    sw a1, 60(sp)
 ; RV32IF-NEXT:    sw zero, 64(sp)
@@ -5495,10 +5489,10 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    sll a2, a2, a0
 ; RV32IF-NEXT:    sll a5, a5, a0
 ; RV32IF-NEXT:    sll a1, a1, a0
-; RV32IF-NEXT:    or s3, a2, a6
+; RV32IF-NEXT:    or s4, a2, a6
 ; RV32IF-NEXT:    or a2, a5, a7
 ; RV32IF-NEXT:    or a3, a1, a3
-; RV32IF-NEXT:    sll s4, a4, a0
+; RV32IF-NEXT:    sll s3, a4, a0
 ; RV32IF-NEXT:    mv a0, s8
 ; RV32IF-NEXT:    mv a1, s0
 ; RV32IF-NEXT:    call __muldi3
@@ -5506,21 +5500,21 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    mv s5, a1
 ; RV32IF-NEXT:    mv a0, s0
 ; RV32IF-NEXT:    mv a1, s0
-; RV32IF-NEXT:    mv a2, s4
-; RV32IF-NEXT:    mv a3, s3
+; RV32IF-NEXT:    mv a2, s3
+; RV32IF-NEXT:    mv a3, s4
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    add a1, a1, s5
 ; RV32IF-NEXT:    add s1, a0, s2
 ; RV32IF-NEXT:    sltu a0, s1, a0
 ; RV32IF-NEXT:    add s7, a1, a0
-; RV32IF-NEXT:    mv a0, s3
+; RV32IF-NEXT:    mv a0, s4
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
 ; RV32IF-NEXT:    mv s5, a0
 ; RV32IF-NEXT:    mv s6, a1
-; RV32IF-NEXT:    mv a0, s4
+; RV32IF-NEXT:    mv a0, s3
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s8
 ; RV32IF-NEXT:    li a3, 0
@@ -5529,76 +5523,75 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    add s8, s5, a1
 ; RV32IF-NEXT:    sltu a0, s8, s5
 ; RV32IF-NEXT:    add s6, s6, a0
-; RV32IF-NEXT:    mv a0, s4
+; RV32IF-NEXT:    mv a0, s3
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    add s8, a0, s8
-; RV32IF-NEXT:    sltu a0, s8, a0
+; RV32IF-NEXT:    add s3, a0, s8
+; RV32IF-NEXT:    sltu a0, s3, a0
 ; RV32IF-NEXT:    add a0, a1, a0
-; RV32IF-NEXT:    add s4, s6, a0
-; RV32IF-NEXT:    sltu s5, s4, s6
-; RV32IF-NEXT:    mv a0, s3
+; RV32IF-NEXT:    add s5, s6, a0
+; RV32IF-NEXT:    sltu s6, s5, s6
+; RV32IF-NEXT:    mv a0, s4
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    mv a2, s0
 ; RV32IF-NEXT:    li a3, 0
 ; RV32IF-NEXT:    call __muldi3
-; RV32IF-NEXT:    mv a2, a1
-; RV32IF-NEXT:    mv a1, s8
-; RV32IF-NEXT:    add a2, a2, s5
-; RV32IF-NEXT:    add s4, a0, s4
-; RV32IF-NEXT:    sltu a3, s4, a0
-; RV32IF-NEXT:    add a0, s4, s1
-; RV32IF-NEXT:    add a2, a2, a3
-; RV32IF-NEXT:    add a2, a2, s7
-; RV32IF-NEXT:    sltu a3, a0, s4
-; RV32IF-NEXT:    add a2, a2, a3
-; RV32IF-NEXT:  .LBB45_7: # %fp-to-i-cleanup
-; RV32IF-NEXT:    lui a3, 524288
-; RV32IF-NEXT:    addi a4, a3, -1
-; RV32IF-NEXT:    beq a1, a4, .LBB45_9
-; RV32IF-NEXT:  # %bb.8: # %fp-to-i-cleanup
-; RV32IF-NEXT:    sltu a5, a1, a4
-; RV32IF-NEXT:    or a6, a0, a2
-; RV32IF-NEXT:    bnez a6, .LBB45_10
-; RV32IF-NEXT:    j .LBB45_11
-; RV32IF-NEXT:  .LBB45_9:
-; RV32IF-NEXT:    sltiu a5, s2, -1
-; RV32IF-NEXT:    or a6, a0, a2
-; RV32IF-NEXT:    beqz a6, .LBB45_11
+; RV32IF-NEXT:    add a1, a1, s6
+; RV32IF-NEXT:    add s5, a0, s5
+; RV32IF-NEXT:    sltu a2, s5, a0
+; RV32IF-NEXT:    add a0, s5, s1
+; RV32IF-NEXT:    add a1, a1, a2
+; RV32IF-NEXT:    add a1, a1, s7
+; RV32IF-NEXT:    sltu a2, a0, s5
+; RV32IF-NEXT:    add a1, a1, a2
+; RV32IF-NEXT:  .LBB45_6: # %fp-to-i-cleanup
+; RV32IF-NEXT:    lui a2, 524288
+; RV32IF-NEXT:    addi a3, a2, -1
+; RV32IF-NEXT:    beq s3, a3, .LBB45_8
+; RV32IF-NEXT:  # %bb.7: # %fp-to-i-cleanup
+; RV32IF-NEXT:    sltu a4, s3, a3
+; RV32IF-NEXT:    or a5, a0, a1
+; RV32IF-NEXT:    bnez a5, .LBB45_9
+; RV32IF-NEXT:    j .LBB45_10
+; RV32IF-NEXT:  .LBB45_8:
+; RV32IF-NEXT:    sltiu a4, s2, -1
+; RV32IF-NEXT:    or a5, a0, a1
+; RV32IF-NEXT:    beqz a5, .LBB45_10
+; RV32IF-NEXT:  .LBB45_9: # %fp-to-i-cleanup
+; RV32IF-NEXT:    srli a4, a1, 31
 ; RV32IF-NEXT:  .LBB45_10: # %fp-to-i-cleanup
-; RV32IF-NEXT:    srli a5, a2, 31
-; RV32IF-NEXT:  .LBB45_11: # %fp-to-i-cleanup
-; RV32IF-NEXT:    neg a6, a5
-; RV32IF-NEXT:    addi a7, a5, -1
-; RV32IF-NEXT:    bnez a5, .LBB45_13
-; RV32IF-NEXT:  # %bb.12: # %fp-to-i-cleanup
-; RV32IF-NEXT:    mv a1, a4
-; RV32IF-NEXT:  .LBB45_13: # %fp-to-i-cleanup
-; RV32IF-NEXT:    or a4, a7, s2
-; RV32IF-NEXT:    and a2, a6, a2
-; RV32IF-NEXT:    and a5, a6, a0
-; RV32IF-NEXT:    beq a1, a3, .LBB45_15
-; RV32IF-NEXT:  # %bb.14: # %fp-to-i-cleanup
-; RV32IF-NEXT:    sltu a0, a3, a1
-; RV32IF-NEXT:    j .LBB45_16
-; RV32IF-NEXT:  .LBB45_15:
-; RV32IF-NEXT:    snez a0, a4
-; RV32IF-NEXT:  .LBB45_16: # %fp-to-i-cleanup
-; RV32IF-NEXT:    and a5, a5, a2
-; RV32IF-NEXT:    li a3, -1
-; RV32IF-NEXT:    beq a5, a3, .LBB45_18
-; RV32IF-NEXT:  # %bb.17: # %fp-to-i-cleanup
-; RV32IF-NEXT:    srli a2, a2, 31
-; RV32IF-NEXT:    xori a0, a2, 1
-; RV32IF-NEXT:  .LBB45_18: # %fp-to-i-cleanup
-; RV32IF-NEXT:    bnez a0, .LBB45_20
-; RV32IF-NEXT:  # %bb.19: # %fp-to-i-cleanup
-; RV32IF-NEXT:    lui a1, 524288
-; RV32IF-NEXT:  .LBB45_20: # %fp-to-i-cleanup
+; RV32IF-NEXT:    neg a5, a4
+; RV32IF-NEXT:    addi a6, a4, -1
+; RV32IF-NEXT:    bnez a4, .LBB45_12
+; RV32IF-NEXT:  # %bb.11: # %fp-to-i-cleanup
+; RV32IF-NEXT:    mv s3, a3
+; RV32IF-NEXT:  .LBB45_12: # %fp-to-i-cleanup
+; RV32IF-NEXT:    or a3, a6, s2
+; RV32IF-NEXT:    and a1, a5, a1
+; RV32IF-NEXT:    and a4, a5, a0
+; RV32IF-NEXT:    beq s3, a2, .LBB45_14
+; RV32IF-NEXT:  # %bb.13: # %fp-to-i-cleanup
+; RV32IF-NEXT:    sltu a0, a2, s3
+; RV32IF-NEXT:    j .LBB45_15
+; RV32IF-NEXT:  .LBB45_14:
+; RV32IF-NEXT:    snez a0, a3
+; RV32IF-NEXT:  .LBB45_15: # %fp-to-i-cleanup
+; RV32IF-NEXT:    and a4, a4, a1
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    beq a4, a2, .LBB45_17
+; RV32IF-NEXT:  # %bb.16: # %fp-to-i-cleanup
+; RV32IF-NEXT:    srli a1, a1, 31
+; RV32IF-NEXT:    xori a0, a1, 1
+; RV32IF-NEXT:  .LBB45_17: # %fp-to-i-cleanup
+; RV32IF-NEXT:    bnez a0, .LBB45_19
+; RV32IF-NEXT:  # %bb.18: # %fp-to-i-cleanup
+; RV32IF-NEXT:    lui s3, 524288
+; RV32IF-NEXT:  .LBB45_19: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    neg a0, a0
-; RV32IF-NEXT:    and a0, a0, a4
+; RV32IF-NEXT:    and a0, a0, a3
+; RV32IF-NEXT:    mv a1, s3
 ; RV32IF-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
@@ -5628,6 +5621,15 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    addi sp, sp, 128
 ; RV32IF-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB45_20: # %fp-to-i-if-then5
+; RV32IF-NEXT:    .cfi_restore_state
+; RV32IF-NEXT:    xori a0, a3, 1
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    neg s2, a0
+; RV32IF-NEXT:    sub a1, a1, a0
+; RV32IF-NEXT:    mv s3, s2
+; RV32IF-NEXT:    mv a0, s2
+; RV32IF-NEXT:    j .LBB45_6
 ;
 ; RV64IF-LABEL: stest_f64i64_mm:
 ; RV64IF:       # %bb.0: # %entry
@@ -5700,6 +5702,7 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    .cfi_offset s9, -44
 ; RV32IFD-NEXT:    .cfi_offset s10, -48
 ; RV32IFD-NEXT:    .cfi_offset s11, -52
+; RV32IFD-NEXT:    .cfi_remember_state
 ; RV32IFD-NEXT:    fsd fa0, 16(sp)
 ; RV32IFD-NEXT:    lw a2, 20(sp)
 ; RV32IFD-NEXT:    slli a0, a2, 1
@@ -5708,26 +5711,18 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    bgeu a0, a1, .LBB45_2
 ; RV32IFD-NEXT:  # %bb.1:
 ; RV32IFD-NEXT:    li s2, 0
-; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    li s3, 0
 ; RV32IFD-NEXT:    li a0, 0
-; RV32IFD-NEXT:    li a2, 0
-; RV32IFD-NEXT:    j .LBB45_7
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    j .LBB45_6
 ; RV32IFD-NEXT:  .LBB45_2: # %fp-to-i-if-end
 ; RV32IFD-NEXT:    addi a1, a0, -1151
 ; RV32IFD-NEXT:    sltu a3, a1, a0
 ; RV32IFD-NEXT:    sltiu a1, a1, -128
 ; RV32IFD-NEXT:    or a1, a3, a1
 ; RV32IFD-NEXT:    srli a3, a2, 31
-; RV32IFD-NEXT:    beqz a1, .LBB45_4
-; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT:    xori a0, a3, 1
-; RV32IFD-NEXT:    lui a2, 524288
-; RV32IFD-NEXT:    neg s2, a0
-; RV32IFD-NEXT:    sub a2, a2, a0
-; RV32IFD-NEXT:    mv a1, s2
-; RV32IFD-NEXT:    mv a0, s2
-; RV32IFD-NEXT:    j .LBB45_7
-; RV32IFD-NEXT:  .LBB45_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT:    bnez a1, .LBB45_20
+; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IFD-NEXT:    lw a1, 16(sp)
 ; RV32IFD-NEXT:    neg s0, a3
 ; RV32IFD-NEXT:    slli a2, a2, 12
@@ -5736,8 +5731,8 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    or a2, a2, a3
 ; RV32IFD-NEXT:    li a3, 1074
 ; RV32IFD-NEXT:    ori s8, s0, 1
-; RV32IFD-NEXT:    bltu a3, a0, .LBB45_6
-; RV32IFD-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT:    bltu a3, a0, .LBB45_5
+; RV32IFD-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IFD-NEXT:    sw zero, 40(sp)
 ; RV32IFD-NEXT:    sw zero, 44(sp)
 ; RV32IFD-NEXT:    sw zero, 48(sp)
@@ -5769,70 +5764,68 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    srl a0, a0, a1
 ; RV32IFD-NEXT:    or a3, a3, a6
 ; RV32IFD-NEXT:    sw a3, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    or s5, a2, a7
-; RV32IFD-NEXT:    or s6, a0, a5
-; RV32IFD-NEXT:    srl s4, a4, a1
-; RV32IFD-NEXT:    mv a0, s6
+; RV32IFD-NEXT:    or s6, a2, a7
+; RV32IFD-NEXT:    or s7, a0, a5
+; RV32IFD-NEXT:    srl s5, a4, a1
+; RV32IFD-NEXT:    mv a0, s7
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    mv s2, a0
-; RV32IFD-NEXT:    mv s7, a1
-; RV32IFD-NEXT:    mv a0, s5
+; RV32IFD-NEXT:    mv s3, a1
+; RV32IFD-NEXT:    mv a0, s6
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    add s7, a0, s7
-; RV32IFD-NEXT:    sltu a0, s7, a0
-; RV32IFD-NEXT:    add s9, a1, a0
-; RV32IFD-NEXT:    mv a0, s6
+; RV32IFD-NEXT:    add s3, a0, s3
+; RV32IFD-NEXT:    sltu a0, s3, a0
+; RV32IFD-NEXT:    add s10, a1, a0
+; RV32IFD-NEXT:    mv a0, s7
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    add s10, a0, s7
-; RV32IFD-NEXT:    sltu a0, s10, a0
+; RV32IFD-NEXT:    add s3, a0, s3
+; RV32IFD-NEXT:    sltu a0, s3, a0
 ; RV32IFD-NEXT:    add a0, a1, a0
-; RV32IFD-NEXT:    add s11, s9, a0
-; RV32IFD-NEXT:    mv a0, s5
+; RV32IFD-NEXT:    add s11, s10, a0
+; RV32IFD-NEXT:    mv a0, s6
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    mv s1, a0
-; RV32IFD-NEXT:    mv s3, s8
-; RV32IFD-NEXT:    mv s8, a1
-; RV32IFD-NEXT:    add s7, a0, s11
+; RV32IFD-NEXT:    mv s9, a1
+; RV32IFD-NEXT:    mv s4, s8
+; RV32IFD-NEXT:    add s8, a0, s11
 ; RV32IFD-NEXT:    mv a0, s0
 ; RV32IFD-NEXT:    mv a1, s0
-; RV32IFD-NEXT:    mv a2, s6
-; RV32IFD-NEXT:    mv a3, s5
+; RV32IFD-NEXT:    mv a2, s7
+; RV32IFD-NEXT:    mv a3, s6
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    mv s5, a0
-; RV32IFD-NEXT:    mv s6, a1
-; RV32IFD-NEXT:    mv a0, s3
+; RV32IFD-NEXT:    mv s6, a0
+; RV32IFD-NEXT:    mv s7, a1
+; RV32IFD-NEXT:    mv a0, s4
 ; RV32IFD-NEXT:    mv a1, s0
 ; RV32IFD-NEXT:    lw a2, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    mv a3, s4
+; RV32IFD-NEXT:    mv a3, s5
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    mv a2, a1
-; RV32IFD-NEXT:    mv a1, s10
-; RV32IFD-NEXT:    add a3, s5, a0
-; RV32IFD-NEXT:    sltu a4, s7, s1
-; RV32IFD-NEXT:    sltu a5, s11, s9
-; RV32IFD-NEXT:    add a2, s6, a2
-; RV32IFD-NEXT:    add a0, s7, a3
-; RV32IFD-NEXT:    add a5, s8, a5
-; RV32IFD-NEXT:    sltu a3, a3, s5
-; RV32IFD-NEXT:    sltu a6, a0, s7
-; RV32IFD-NEXT:    add a4, a5, a4
-; RV32IFD-NEXT:    add a2, a2, a3
-; RV32IFD-NEXT:    add a2, a4, a2
-; RV32IFD-NEXT:    add a2, a2, a6
-; RV32IFD-NEXT:    j .LBB45_7
-; RV32IFD-NEXT:  .LBB45_6: # %fp-to-i-if-else
+; RV32IFD-NEXT:    add a2, s6, a0
+; RV32IFD-NEXT:    sltu a3, s8, s1
+; RV32IFD-NEXT:    sltu a4, s11, s10
+; RV32IFD-NEXT:    add a1, s7, a1
+; RV32IFD-NEXT:    add a0, s8, a2
+; RV32IFD-NEXT:    add a4, s9, a4
+; RV32IFD-NEXT:    sltu a2, a2, s6
+; RV32IFD-NEXT:    sltu a5, a0, s8
+; RV32IFD-NEXT:    add a3, a4, a3
+; RV32IFD-NEXT:    add a1, a1, a2
+; RV32IFD-NEXT:    add a1, a3, a1
+; RV32IFD-NEXT:    add a1, a1, a5
+; RV32IFD-NEXT:    j .LBB45_6
+; RV32IFD-NEXT:  .LBB45_5: # %fp-to-i-if-else
 ; RV32IFD-NEXT:    sw a1, 72(sp)
 ; RV32IFD-NEXT:    sw a2, 76(sp)
 ; RV32IFD-NEXT:    sw zero, 80(sp)
@@ -5861,10 +5854,10 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    sll a2, a2, a0
 ; RV32IFD-NEXT:    sll a5, a5, a0
 ; RV32IFD-NEXT:    sll a1, a1, a0
-; RV32IFD-NEXT:    or s3, a2, a6
+; RV32IFD-NEXT:    or s4, a2, a6
 ; RV32IFD-NEXT:    or a2, a5, a7
 ; RV32IFD-NEXT:    or a3, a1, a3
-; RV32IFD-NEXT:    sll s4, a4, a0
+; RV32IFD-NEXT:    sll s3, a4, a0
 ; RV32IFD-NEXT:    mv a0, s8
 ; RV32IFD-NEXT:    mv a1, s0
 ; RV32IFD-NEXT:    call __muldi3
@@ -5872,21 +5865,21 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    mv s5, a1
 ; RV32IFD-NEXT:    mv a0, s0
 ; RV32IFD-NEXT:    mv a1, s0
-; RV32IFD-NEXT:    mv a2, s4
-; RV32IFD-NEXT:    mv a3, s3
+; RV32IFD-NEXT:    mv a2, s3
+; RV32IFD-NEXT:    mv a3, s4
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    add a1, a1, s5
 ; RV32IFD-NEXT:    add s1, a0, s2
 ; RV32IFD-NEXT:    sltu a0, s1, a0
 ; RV32IFD-NEXT:    add s7, a1, a0
-; RV32IFD-NEXT:    mv a0, s3
+; RV32IFD-NEXT:    mv a0, s4
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
 ; RV32IFD-NEXT:    mv s5, a0
 ; RV32IFD-NEXT:    mv s6, a1
-; RV32IFD-NEXT:    mv a0, s4
+; RV32IFD-NEXT:    mv a0, s3
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s8
 ; RV32IFD-NEXT:    li a3, 0
@@ -5895,76 +5888,75 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    add s8, s5, a1
 ; RV32IFD-NEXT:    sltu a0, s8, s5
 ; RV32IFD-NEXT:    add s6, s6, a0
-; RV32IFD-NEXT:    mv a0, s4
+; RV32IFD-NEXT:    mv a0, s3
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    add s8, a0, s8
-; RV32IFD-NEXT:    sltu a0, s8, a0
+; RV32IFD-NEXT:    add s3, a0, s8
+; RV32IFD-NEXT:    sltu a0, s3, a0
 ; RV32IFD-NEXT:    add a0, a1, a0
-; RV32IFD-NEXT:    add s4, s6, a0
-; RV32IFD-NEXT:    sltu s5, s4, s6
-; RV32IFD-NEXT:    mv a0, s3
+; RV32IFD-NEXT:    add s5, s6, a0
+; RV32IFD-NEXT:    sltu s6, s5, s6
+; RV32IFD-NEXT:    mv a0, s4
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    mv a2, s0
 ; RV32IFD-NEXT:    li a3, 0
 ; RV32IFD-NEXT:    call __muldi3
-; RV32IFD-NEXT:    mv a2, a1
-; RV32IFD-NEXT:    mv a1, s8
-; RV32IFD-NEXT:    add a2, a2, s5
-; RV32IFD-NEXT:    add s4, a0, s4
-; RV32IFD-NEXT:    sltu a3, s4, a0
-; RV32IFD-NEXT:    add a0, s4, s1
-; RV32IFD-NEXT:    add a2, a2, a3
-; RV32IFD-NEXT:    add a2, a2, s7
-; RV32IFD-NEXT:    sltu a3, a0, s4
-; RV32IFD-NEXT:    add a2, a2, a3
-; RV32IFD-NEXT:  .LBB45_7: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    lui a3, 524288
-; RV32IFD-NEXT:    addi a4, a3, -1
-; RV32IFD-NEXT:    beq a1, a4, .LBB45_9
-; RV32IFD-NEXT:  # %bb.8: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    sltu a5, a1, a4
-; RV32IFD-NEXT:    or a6, a0, a2
-; RV32IFD-NEXT:    bnez a6, .LBB45_10
-; RV32IFD-NEXT:    j .LBB45_11
-; RV32IFD-NEXT:  .LBB45_9:
-; RV32IFD-NEXT:    sltiu a5, s2, -1
-; RV32IFD-NEXT:    or a6, a0, a2
-; RV32IFD-NEXT:    beqz a6, .LBB45_11
+; RV32IFD-NEXT:    add a1, a1, s6
+; RV32IFD-NEXT:    add s5, a0, s5
+; RV32IFD-NEXT:    sltu a2, s5, a0
+; RV32IFD-NEXT:    add a0, s5, s1
+; RV32IFD-NEXT:    add a1, a1, a2
+; RV32IFD-NEXT:    add a1, a1, s7
+; RV32IFD-NEXT:    sltu a2, a0, s5
+; RV32IFD-NEXT:    add a1, a1, a2
+; RV32IFD-NEXT:  .LBB45_6: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    lui a2, 524288
+; RV32IFD-NEXT:    addi a3, a2, -1
+; RV32IFD-NEXT:    beq s3, a3, .LBB45_8
+; RV32IFD-NEXT:  # %bb.7: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    sltu a4, s3, a3
+; RV32IFD-NEXT:    or a5, a0, a1
+; RV32IFD-NEXT:    bnez a5, .LBB45_9
+; RV32IFD-NEXT:    j .LBB45_10
+; RV32IFD-NEXT:  .LBB45_8:
+; RV32IFD-NEXT:    sltiu a4, s2, -1
+; RV32IFD-NEXT:    or a5, a0, a1
+; RV32IFD-NEXT:    beqz a5, .LBB45_10
+; RV32IFD-NEXT:  .LBB45_9: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    srli a4, a1, 31
 ; RV32IFD-NEXT:  .LBB45_10: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    srli a5, a2, 31
-; RV32IFD-NEXT:  .LBB45_11: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    neg a6, a5
-; RV32IFD-NEXT:    addi a7, a5, -1
-; RV32IFD-NEXT:    bnez a5, .LBB45_13
-; RV32IFD-NEXT:  # %bb.12: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    mv a1, a4
-; RV32IFD-NEXT:  .LBB45_13: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    or a4, a7, s2
-; RV32IFD-NEXT:    and a2, a6, a2
-; RV32IFD-NEXT:    and a5, a6, a0
-; RV32IFD-NEXT:    beq a1, a3, .LBB45_15
-; RV32IFD-NEXT:  # %bb.14: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    sltu a0, a3, a1
-; RV32IFD-NEXT:    j .LBB45_16
-; RV32IFD-NEXT:  .LBB45_15:
-; RV32IFD-NEXT:    snez a0, a4
-; RV32IFD-NEXT:  .LBB45_16: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    and a5, a5, a2
-; RV32IFD-NEXT:    li a3, -1
-; RV32IFD-NEXT:    beq a5, a3, .LBB45_18
-; RV32IFD-NEXT:  # %bb.17: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    srli a2, a2, 31
-; RV32IFD-NEXT:    xori a0, a2, 1
-; RV32IFD-NEXT:  .LBB45_18: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    bnez a0, .LBB45_20
-; RV32IFD-NEXT:  # %bb.19: # %fp-to-i-cleanup
-; RV32IFD-NEXT:    lui a1, 524288
-; RV32IFD-NEXT:  .LBB45_20: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    neg a5, a4
+; RV32IFD-NEXT:    addi a6, a4, -1
+; RV32IFD-NEXT:    bnez a4, .LBB45_12
+; RV32IFD-NEXT:  # %bb.11: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    mv s3, a3
+; RV32IFD-NEXT:  .LBB45_12: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    or a3, a6, s2
+; RV32IFD-NEXT:    and a1, a5, a1
+; RV32IFD-NEXT:    and a4, a5, a0
+; RV32IFD-NEXT:    beq s3, a2, .LBB45_14
+; RV32IFD-NEXT:  # %bb.13: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    sltu a0, a2, s3
+; RV32IFD-NEXT:    j .LBB45_15
+; RV32IFD-NEXT:  .LBB45_14:
+; RV32IFD-NEXT:    snez a0, a3
+; RV32IFD-NEXT:  .LBB45_15: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    and a4, a4, a1
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    beq a4, a2, .LBB45_17
+; RV32IFD-NEXT:  # %bb.16: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    srli a1, a1, 31
+; RV32IFD-NEXT:    xori a0, a1, 1
+; RV32IFD-NEXT:  .LBB45_17: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    bnez a0, .LBB45_19
+; RV32IFD-NEXT:  # %bb.18: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    lui s3, 524288
+; RV32IFD-NEXT:  .LBB45_19: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    neg a0, a0
-; RV32IFD-NEXT:    and a0, a0, a4
+; RV32IFD-NEXT:    and a0, a0, a3
+; RV32IFD-NEXT:    mv a1, s3
 ; RV32IFD-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
@@ -5994,6 +5986,15 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    addi sp, sp, 144
 ; RV32IFD-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB45_20: # %fp-to-i-if-then5
+; RV32IFD-NEXT:    .cfi_restore_state
+; RV32IFD-NEXT:    xori a0, a3, 1
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    neg s2, a0
+; RV32IFD-NEXT:    sub a1, a1, a0
+; RV32IFD-NEXT:    mv s3, s2
+; RV32IFD-NEXT:    mv a0, s2
+; RV32IFD-NEXT:    j .LBB45_6
 ;
 ; RV64IFD-LABEL: stest_f64i64_mm:
 ; RV64IFD:       # %bb.0: # %entry
@@ -6042,6 +6043,7 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    .cfi_offset s9, -44
 ; RV32IF-NEXT:    .cfi_offset s10, -48
 ; RV32IF-NEXT:    .cfi_offset s11, -52
+; RV32IF-NEXT:    .cfi_remember_state
 ; RV32IF-NEXT:    slli a2, a1, 1
 ; RV32IF-NEXT:    srli a2, a2, 21
 ; RV32IF-NEXT:    li a3, 1023
@@ -6051,23 +6053,15 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    li s9, 0
 ; RV32IF-NEXT:    li a0, 0
 ; RV32IF-NEXT:    li a1, 0
-; RV32IF-NEXT:    j .LBB46_7
+; RV32IF-NEXT:    j .LBB46_6
 ; RV32IF-NEXT:  .LBB46_2: # %fp-to-i-if-end
 ; RV32IF-NEXT:    addi a3, a2, -1151
 ; RV32IF-NEXT:    sltu a4, a3, a2
 ; RV32IF-NEXT:    sltiu a3, a3, -128
 ; RV32IF-NEXT:    or a4, a4, a3
 ; RV32IF-NEXT:    srli a3, a1, 31
-; RV32IF-NEXT:    beqz a4, .LBB46_4
-; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT:    xori a0, a3, 1
-; RV32IF-NEXT:    lui a1, 524288
-; RV32IF-NEXT:    neg s2, a0
-; RV32IF-NEXT:    sub a1, a1, a0
-; RV32IF-NEXT:    mv s9, s2
-; RV32IF-NEXT:    mv a0, s2
-; RV32IF-NEXT:    j .LBB46_7
-; RV32IF-NEXT:  .LBB46_4: # %fp-to-i-if-end9
+; RV32IF-NEXT:    bnez a4, .LBB46_7
+; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IF-NEXT:    neg s0, a3
 ; RV32IF-NEXT:    slli a1, a1, 12
 ; RV32IF-NEXT:    lui a3, 256
@@ -6075,8 +6069,8 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    srli a1, a1, 12
 ; RV32IF-NEXT:    or a1, a1, a3
 ; RV32IF-NEXT:    ori s8, s0, 1
-; RV32IF-NEXT:    bltu a4, a2, .LBB46_6
-; RV32IF-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT:    bltu a4, a2, .LBB46_5
+; RV32IF-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IF-NEXT:    sw zero, 24(sp)
 ; RV32IF-NEXT:    sw zero, 28(sp)
 ; RV32IF-NEXT:    sw zero, 32(sp)
@@ -6168,8 +6162,8 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    add a1, a1, a2
 ; RV32IF-NEXT:    add a1, a3, a1
 ; RV32IF-NEXT:    add a1, a1, a5
-; RV32IF-NEXT:    j .LBB46_7
-; RV32IF-NEXT:  .LBB46_6: # %fp-to-i-if-else
+; RV32IF-NEXT:    j .LBB46_6
+; RV32IF-NEXT:  .LBB46_5: # %fp-to-i-if-else
 ; RV32IF-NEXT:    sw a0, 56(sp)
 ; RV32IF-NEXT:    sw a1, 60(sp)
 ; RV32IF-NEXT:    sw zero, 64(sp)
@@ -6255,7 +6249,7 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    add a1, a1, s7
 ; RV32IF-NEXT:    sltu a2, a0, s4
 ; RV32IF-NEXT:    add a1, a1, a2
-; RV32IF-NEXT:  .LBB46_7: # %fp-to-i-cleanup
+; RV32IF-NEXT:  .LBB46_6: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    or a2, a1, a0
 ; RV32IF-NEXT:    xori a0, a0, 1
 ; RV32IF-NEXT:    seqz a2, a2
@@ -6295,6 +6289,15 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    addi sp, sp, 128
 ; RV32IF-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB46_7: # %fp-to-i-if-then5
+; RV32IF-NEXT:    .cfi_restore_state
+; RV32IF-NEXT:    xori a0, a3, 1
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    neg s2, a0
+; RV32IF-NEXT:    sub a1, a1, a0
+; RV32IF-NEXT:    mv s9, s2
+; RV32IF-NEXT:    mv a0, s2
+; RV32IF-NEXT:    j .LBB46_6
 ;
 ; RV64-LABEL: utest_f64i64_mm:
 ; RV64:       # %bb.0: # %entry
@@ -6342,6 +6345,7 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    .cfi_offset s9, -44
 ; RV32IFD-NEXT:    .cfi_offset s10, -48
 ; RV32IFD-NEXT:    .cfi_offset s11, -52
+; RV32IFD-NEXT:    .cfi_remember_state
 ; RV32IFD-NEXT:    fsd fa0, 16(sp)
 ; RV32IFD-NEXT:    lw a2, 20(sp)
 ; RV32IFD-NEXT:    slli a0, a2, 1
@@ -6353,23 +6357,15 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    li s9, 0
 ; RV32IFD-NEXT:    li a0, 0
 ; RV32IFD-NEXT:    li a1, 0
-; RV32IFD-NEXT:    j .LBB46_7
+; RV32IFD-NEXT:    j .LBB46_6
 ; RV32IFD-NEXT:  .LBB46_2: # %fp-to-i-if-end
 ; RV32IFD-NEXT:    addi a1, a0, -1151
 ; RV32IFD-NEXT:    sltu a3, a1, a0
 ; RV32IFD-NEXT:    sltiu a1, a1, -128
 ; RV32IFD-NEXT:    or a1, a3, a1
 ; RV32IFD-NEXT:    srli a3, a2, 31
-; RV32IFD-NEXT:    beqz a1, .LBB46_4
-; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT:    xori a0, a3, 1
-; RV32IFD-NEXT:    lui a1, 524288
-; RV32IFD-NEXT:    neg s2, a0
-; RV32IFD-NEXT:    sub a1, a1, a0
-; RV32IFD-NEXT:    mv s9, s2
-; RV32IFD-NEXT:    mv a0, s2
-; RV32IFD-NEXT:    j .LBB46_7
-; RV32IFD-NEXT:  .LBB46_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT:    bnez a1, .LBB46_7
+; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IFD-NEXT:    lw a1, 16(sp)
 ; RV32IFD-NEXT:    neg s0, a3
 ; RV32IFD-NEXT:    slli a2, a2, 12
@@ -6378,8 +6374,8 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    or a2, a2, a3
 ; RV32IFD-NEXT:    li a3, 1074
 ; RV32IFD-NEXT:    ori s8, s0, 1
-; RV32IFD-NEXT:    bltu a3, a0, .LBB46_6
-; RV32IFD-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT:    bltu a3, a0, .LBB46_5
+; RV32IFD-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IFD-NEXT:    sw zero, 40(sp)
 ; RV32IFD-NEXT:    sw zero, 44(sp)
 ; RV32IFD-NEXT:    sw zero, 48(sp)
@@ -6471,8 +6467,8 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    add a1, a1, a2
 ; RV32IFD-NEXT:    add a1, a3, a1
 ; RV32IFD-NEXT:    add a1, a1, a5
-; RV32IFD-NEXT:    j .LBB46_7
-; RV32IFD-NEXT:  .LBB46_6: # %fp-to-i-if-else
+; RV32IFD-NEXT:    j .LBB46_6
+; RV32IFD-NEXT:  .LBB46_5: # %fp-to-i-if-else
 ; RV32IFD-NEXT:    sw a1, 72(sp)
 ; RV32IFD-NEXT:    sw a2, 76(sp)
 ; RV32IFD-NEXT:    sw zero, 80(sp)
@@ -6558,7 +6554,7 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    add a1, a1, s7
 ; RV32IFD-NEXT:    sltu a2, a0, s4
 ; RV32IFD-NEXT:    add a1, a1, a2
-; RV32IFD-NEXT:  .LBB46_7: # %fp-to-i-cleanup
+; RV32IFD-NEXT:  .LBB46_6: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    or a2, a1, a0
 ; RV32IFD-NEXT:    xori a0, a0, 1
 ; RV32IFD-NEXT:    seqz a2, a2
@@ -6598,6 +6594,15 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    addi sp, sp, 144
 ; RV32IFD-NEXT:    .cfi_def_cfa_offset 0
 ; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB46_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT:    .cfi_restore_state
+; RV32IFD-NEXT:    xori a0, a3, 1
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    neg s2, a0
+; RV32IFD-NEXT:    sub a1, a1, a0
+; RV32IFD-NEXT:    mv s9, s2
+; RV32IFD-NEXT:    mv a0, s2
+; RV32IFD-NEXT:    j .LBB46_6
 entry:
   %conv = fptoui double %x to i128
   %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
@@ -6645,24 +6650,15 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    li s9, 0
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:    li a0, 0
-; RV32IF-NEXT:    j .LBB47_6
+; RV32IF-NEXT:    j .LBB47_8
 ; RV32IF-NEXT:  .LBB47_2: # %fp-to-i-if-end
 ; RV32IF-NEXT:    addi a3, a2, -1151
 ; RV32IF-NEXT:    sltu a4, a3, a2
 ; RV32IF-NEXT:    sltiu a3, a3, -128
 ; RV32IF-NEXT:    or a4, a4, a3
 ; RV32IF-NEXT:    srli a3, a1, 31
-; RV32IF-NEXT:    beqz a4, .LBB47_4
-; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IF-NEXT:    xori a0, a3, 1
-; RV32IF-NEXT:    lui a1, 524288
-; RV32IF-NEXT:    neg s2, a0
-; RV32IF-NEXT:    sub a0, a1, a0
-; RV32IF-NEXT:    mv s9, s2
-; RV32IF-NEXT:    mv a1, s2
-; RV32IF-NEXT:    beqz a0, .LBB47_6
-; RV32IF-NEXT:    j .LBB47_8
-; RV32IF-NEXT:  .LBB47_4: # %fp-to-i-if-end9
+; RV32IF-NEXT:    bnez a4, .LBB47_7
+; RV32IF-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IF-NEXT:    neg s0, a3
 ; RV32IF-NEXT:    slli a1, a1, 12
 ; RV32IF-NEXT:    lui a3, 256
@@ -6670,8 +6666,8 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    srli a1, a1, 12
 ; RV32IF-NEXT:    or a1, a1, a3
 ; RV32IF-NEXT:    ori s8, s0, 1
-; RV32IF-NEXT:    bltu a4, a2, .LBB47_7
-; RV32IF-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IF-NEXT:    bltu a4, a2, .LBB47_5
+; RV32IF-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IF-NEXT:    sw zero, 24(sp)
 ; RV32IF-NEXT:    sw zero, 28(sp)
 ; RV32IF-NEXT:    sw zero, 32(sp)
@@ -6763,11 +6759,9 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    add a0, s6, a0
 ; RV32IF-NEXT:    add a0, a2, a0
 ; RV32IF-NEXT:    add a0, a0, a4
-; RV32IF-NEXT:    bnez a0, .LBB47_8
-; RV32IF-NEXT:  .LBB47_6:
-; RV32IF-NEXT:    seqz a2, a1
-; RV32IF-NEXT:    j .LBB47_9
-; RV32IF-NEXT:  .LBB47_7: # %fp-to-i-if-else
+; RV32IF-NEXT:    bnez a0, .LBB47_6
+; RV32IF-NEXT:    j .LBB47_8
+; RV32IF-NEXT:  .LBB47_5: # %fp-to-i-if-else
 ; RV32IF-NEXT:    sw a0, 56(sp)
 ; RV32IF-NEXT:    sw a1, 60(sp)
 ; RV32IF-NEXT:    sw zero, 64(sp)
@@ -6853,9 +6847,20 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IF-NEXT:    add a0, a0, s7
 ; RV32IF-NEXT:    sltu a2, a1, s4
 ; RV32IF-NEXT:    add a0, a0, a2
-; RV32IF-NEXT:    beqz a0, .LBB47_6
-; RV32IF-NEXT:  .LBB47_8: # %fp-to-i-cleanup
+; RV32IF-NEXT:    beqz a0, .LBB47_8
+; RV32IF-NEXT:  .LBB47_6: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    srli a2, a0, 31
+; RV32IF-NEXT:    j .LBB47_9
+; RV32IF-NEXT:  .LBB47_7: # %fp-to-i-if-then5
+; RV32IF-NEXT:    xori a0, a3, 1
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    neg s2, a0
+; RV32IF-NEXT:    sub a0, a1, a0
+; RV32IF-NEXT:    mv s9, s2
+; RV32IF-NEXT:    mv a1, s2
+; RV32IF-NEXT:    bnez a0, .LBB47_6
+; RV32IF-NEXT:  .LBB47_8:
+; RV32IF-NEXT:    seqz a2, a1
 ; RV32IF-NEXT:  .LBB47_9: # %fp-to-i-cleanup
 ; RV32IF-NEXT:    xori a1, a1, 1
 ; RV32IF-NEXT:    or a1, a1, a0
@@ -6965,24 +6970,15 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    li s9, 0
 ; RV32IFD-NEXT:    li a1, 0
 ; RV32IFD-NEXT:    li a0, 0
-; RV32IFD-NEXT:    j .LBB47_6
+; RV32IFD-NEXT:    j .LBB47_8
 ; RV32IFD-NEXT:  .LBB47_2: # %fp-to-i-if-end
 ; RV32IFD-NEXT:    addi a1, a0, -1151
 ; RV32IFD-NEXT:    sltu a3, a1, a0
 ; RV32IFD-NEXT:    sltiu a1, a1, -128
 ; RV32IFD-NEXT:    or a1, a3, a1
 ; RV32IFD-NEXT:    srli a3, a2, 31
-; RV32IFD-NEXT:    beqz a1, .LBB47_4
-; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32IFD-NEXT:    xori a0, a3, 1
-; RV32IFD-NEXT:    lui a1, 524288
-; RV32IFD-NEXT:    neg s2, a0
-; RV32IFD-NEXT:    sub a0, a1, a0
-; RV32IFD-NEXT:    mv s9, s2
-; RV32IFD-NEXT:    mv a1, s2
-; RV32IFD-NEXT:    beqz a0, .LBB47_6
-; RV32IFD-NEXT:    j .LBB47_8
-; RV32IFD-NEXT:  .LBB47_4: # %fp-to-i-if-end9
+; RV32IFD-NEXT:    bnez a1, .LBB47_7
+; RV32IFD-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32IFD-NEXT:    lw a1, 16(sp)
 ; RV32IFD-NEXT:    neg s0, a3
 ; RV32IFD-NEXT:    slli a2, a2, 12
@@ -6991,8 +6987,8 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    or a2, a2, a3
 ; RV32IFD-NEXT:    li a3, 1074
 ; RV32IFD-NEXT:    ori s8, s0, 1
-; RV32IFD-NEXT:    bltu a3, a0, .LBB47_7
-; RV32IFD-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32IFD-NEXT:    bltu a3, a0, .LBB47_5
+; RV32IFD-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32IFD-NEXT:    sw zero, 40(sp)
 ; RV32IFD-NEXT:    sw zero, 44(sp)
 ; RV32IFD-NEXT:    sw zero, 48(sp)
@@ -7084,11 +7080,9 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    add a0, s6, a0
 ; RV32IFD-NEXT:    add a0, a2, a0
 ; RV32IFD-NEXT:    add a0, a0, a4
-; RV32IFD-NEXT:    bnez a0, .LBB47_8
-; RV32IFD-NEXT:  .LBB47_6:
-; RV32IFD-NEXT:    seqz a2, a1
-; RV32IFD-NEXT:    j .LBB47_9
-; RV32IFD-NEXT:  .LBB47_7: # %fp-to-i-if-else
+; RV32IFD-NEXT:    bnez a0, .LBB47_6
+; RV32IFD-NEXT:    j .LBB47_8
+; RV32IFD-NEXT:  .LBB47_5: # %fp-to-i-if-else
 ; RV32IFD-NEXT:    sw a1, 72(sp)
 ; RV32IFD-NEXT:    sw a2, 76(sp)
 ; RV32IFD-NEXT:    sw zero, 80(sp)
@@ -7174,9 +7168,20 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV32IFD-NEXT:    add a0, a0, s7
 ; RV32IFD-NEXT:    sltu a2, a1, s4
 ; RV32IFD-NEXT:    add a0, a0, a2
-; RV32IFD-NEXT:    beqz a0, .LBB47_6
-; RV32IFD-NEXT:  .LBB47_8: # %fp-to-i-cleanup
+; RV32IFD-NEXT:    beqz a0, .LBB47_8
+; RV32IFD-NEXT:  .LBB47_6: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    srli a2, a0, 31
+; RV32IFD-NEXT:    j .LBB47_9
+; RV32IFD-NEXT:  .LBB47_7: # %fp-to-i-if-then5
+; RV32IFD-NEXT:    xori a0, a3, 1
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    neg s2, a0
+; RV32IFD-NEXT:    sub a0, a1, a0
+; RV32IFD-NEXT:    mv s9, s2
+; RV32IFD-NEXT:    mv a1, s2
+; RV32IFD-NEXT:    bnez a0, .LBB47_6
+; RV32IFD-NEXT:  .LBB47_8:
+; RV32IFD-NEXT:    seqz a2, a1
 ; RV32IFD-NEXT:  .LBB47_9: # %fp-to-i-cleanup
 ; RV32IFD-NEXT:    xori a1, a1, 1
 ; RV32IFD-NEXT:    or a1, a1, a0
@@ -7259,6 +7264,7 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    .cfi_offset s9, -44
 ; RV32-NEXT:    .cfi_offset s10, -48
 ; RV32-NEXT:    .cfi_offset s11, -52
+; RV32-NEXT:    .cfi_remember_state
 ; RV32-NEXT:    fmv.x.w a1, fa0
 ; RV32-NEXT:    slli a0, a1, 1
 ; RV32-NEXT:    srli a0, a0, 24
@@ -7266,26 +7272,17 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    bgeu a0, a2, .LBB48_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li s2, 0
-; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    li s3, 0
 ; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    j .LBB48_7
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    j .LBB48_6
 ; RV32-NEXT:  .LBB48_2: # %fp-to-i-if-end
 ; RV32-NEXT:    addi a2, a0, -255
 ; RV32-NEXT:    sltu a3, a2, a0
 ; RV32-NEXT:    sltiu a2, a2, -128
 ; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    beqz a2, .LBB48_4
-; RV32-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    xori a2, a1, 1
-; RV32-NEXT:    addi s2, a1, -1
-; RV32-NEXT:    sub a2, a0, a2
-; RV32-NEXT:    mv a1, s2
-; RV32-NEXT:    mv a0, s2
-; RV32-NEXT:    j .LBB48_7
-; RV32-NEXT:  .LBB48_4: # %fp-to-i-if-end9
+; RV32-NEXT:    bnez a2, .LBB48_20
+; RV32-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32-NEXT:    srai s0, a1, 31
 ; RV32-NEXT:    slli a1, a1, 9
 ; RV32-NEXT:    lui a2, 2048
@@ -7293,8 +7290,8 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    srli a1, a1, 9
 ; RV32-NEXT:    or a1, a1, a2
 ; RV32-NEXT:    ori s8, s0, 1
-; RV32-NEXT:    bltu a3, a0, .LBB48_6
-; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT:    bltu a3, a0, .LBB48_5
+; RV32-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32-NEXT:    sw zero, 24(sp)
 ; RV32-NEXT:    sw zero, 28(sp)
 ; RV32-NEXT:    sw zero, 32(sp)
@@ -7326,70 +7323,68 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    srl a0, a0, a1
 ; RV32-NEXT:    or a3, a3, a6
 ; RV32-NEXT:    sw a3, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    or s5, a2, a7
-; RV32-NEXT:    or s6, a0, a5
-; RV32-NEXT:    srl s4, a4, a1
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    or s6, a2, a7
+; RV32-NEXT:    or s7, a0, a5
+; RV32-NEXT:    srl s5, a4, a1
+; RV32-NEXT:    mv a0, s7
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s2, a0
-; RV32-NEXT:    mv s7, a1
-; RV32-NEXT:    mv a0, s5
+; RV32-NEXT:    mv s3, a1
+; RV32-NEXT:    mv a0, s6
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s7, a0, s7
-; RV32-NEXT:    sltu a0, s7, a0
-; RV32-NEXT:    add s9, a1, a0
-; RV32-NEXT:    mv a0, s6
+; RV32-NEXT:    add s3, a0, s3
+; RV32-NEXT:    sltu a0, s3, a0
+; RV32-NEXT:    add s10, a1, a0
+; RV32-NEXT:    mv a0, s7
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s10, a0, s7
-; RV32-NEXT:    sltu a0, s10, a0
+; RV32-NEXT:    add s3, a0, s3
+; RV32-NEXT:    sltu a0, s3, a0
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s11, s9, a0
-; RV32-NEXT:    mv a0, s5
+; RV32-NEXT:    add s11, s10, a0
+; RV32-NEXT:    mv a0, s6
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s1, a0
-; RV32-NEXT:    mv s3, s8
-; RV32-NEXT:    mv s8, a1
-; RV32-NEXT:    add s7, a0, s11
+; RV32-NEXT:    mv s9, a1
+; RV32-NEXT:    mv s4, s8
+; RV32-NEXT:    add s8, a0, s11
 ; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    mv a1, s0
-; RV32-NEXT:    mv a2, s6
-; RV32-NEXT:    mv a3, s5
+; RV32-NEXT:    mv a2, s7
+; RV32-NEXT:    mv a3, s6
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv s5, a0
-; RV32-NEXT:    mv s6, a1
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    mv s6, a0
+; RV32-NEXT:    mv s7, a1
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    mv a1, s0
 ; RV32-NEXT:    lw a2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    mv a3, s4
+; RV32-NEXT:    mv a3, s5
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, s10
-; RV32-NEXT:    add a3, s5, a0
-; RV32-NEXT:    sltu a4, s7, s1
-; RV32-NEXT:    sltu a5, s11, s9
-; RV32-NEXT:    add a2, s6, a2
-; RV32-NEXT:    add a0, s7, a3
-; RV32-NEXT:    add a5, s8, a5
-; RV32-NEXT:    sltu a3, a3, s5
-; RV32-NEXT:    sltu a6, a0, s7
-; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    add a2, a4, a2
-; RV32-NEXT:    add a2, a2, a6
-; RV32-NEXT:    j .LBB48_7
-; RV32-NEXT:  .LBB48_6: # %fp-to-i-if-else
+; RV32-NEXT:    add a2, s6, a0
+; RV32-NEXT:    sltu a3, s8, s1
+; RV32-NEXT:    sltu a4, s11, s10
+; RV32-NEXT:    add a1, s7, a1
+; RV32-NEXT:    add a0, s8, a2
+; RV32-NEXT:    add a4, s9, a4
+; RV32-NEXT:    sltu a2, a2, s6
+; RV32-NEXT:    sltu a5, a0, s8
+; RV32-NEXT:    add a3, a4, a3
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a1, a1, a5
+; RV32-NEXT:    j .LBB48_6
+; RV32-NEXT:  .LBB48_5: # %fp-to-i-if-else
 ; RV32-NEXT:    sw a1, 56(sp)
 ; RV32-NEXT:    sw zero, 60(sp)
 ; RV32-NEXT:    sw zero, 64(sp)
@@ -7418,10 +7413,10 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    sll a2, a2, a0
 ; RV32-NEXT:    sll a5, a5, a0
 ; RV32-NEXT:    sll a1, a1, a0
-; RV32-NEXT:    or s3, a2, a6
+; RV32-NEXT:    or s4, a2, a6
 ; RV32-NEXT:    or a2, a5, a7
 ; RV32-NEXT:    or a3, a1, a3
-; RV32-NEXT:    sll s4, a4, a0
+; RV32-NEXT:    sll s3, a4, a0
 ; RV32-NEXT:    mv a0, s8
 ; RV32-NEXT:    mv a1, s0
 ; RV32-NEXT:    call __muldi3
@@ -7429,21 +7424,21 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    mv s5, a1
 ; RV32-NEXT:    mv a0, s0
 ; RV32-NEXT:    mv a1, s0
-; RV32-NEXT:    mv a2, s4
-; RV32-NEXT:    mv a3, s3
+; RV32-NEXT:    mv a2, s3
+; RV32-NEXT:    mv a3, s4
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    add a1, a1, s5
 ; RV32-NEXT:    add s1, a0, s2
 ; RV32-NEXT:    sltu a0, s1, a0
 ; RV32-NEXT:    add s7, a1, a0
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
 ; RV32-NEXT:    mv s5, a0
 ; RV32-NEXT:    mv s6, a1
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    mv a0, s3
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s8
 ; RV32-NEXT:    li a3, 0
@@ -7452,76 +7447,75 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    add s8, s5, a1
 ; RV32-NEXT:    sltu a0, s8, s5
 ; RV32-NEXT:    add s6, s6, a0
-; RV32-NEXT:    mv a0, s4
+; RV32-NEXT:    mv a0, s3
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    add s8, a0, s8
-; RV32-NEXT:    sltu a0, s8, a0
+; RV32-NEXT:    add s3, a0, s8
+; RV32-NEXT:    sltu a0, s3, a0
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    add s4, s6, a0
-; RV32-NEXT:    sltu s5, s4, s6
-; RV32-NEXT:    mv a0, s3
+; RV32-NEXT:    add s5, s6, a0
+; RV32-NEXT:    sltu s6, s5, s6
+; RV32-NEXT:    mv a0, s4
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    mv a2, s0
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __muldi3
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, s8
-; RV32-NEXT:    add a2, a2, s5
-; RV32-NEXT:    add s4, a0, s4
-; RV32-NEXT:    sltu a3, s4, a0
-; RV32-NEXT:    add a0, s4, s1
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    add a2, a2, s7
-; RV32-NEXT:    sltu a3, a0, s4
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:  .LBB48_7: # %fp-to-i-cleanup
-; RV32-NEXT:    lui a3, 524288
-; RV32-NEXT:    addi a4, a3, -1
-; RV32-NEXT:    beq a1, a4, .LBB48_9
-; RV32-NEXT:  # %bb.8: # %fp-to-i-cleanup
-; RV32-NEXT:    sltu a5, a1, a4
-; RV32-NEXT:    or a6, a0, a2
-; RV32-NEXT:    bnez a6, .LBB48_10
-; RV32-NEXT:    j .LBB48_11
-; RV32-NEXT:  .LBB48_9:
-; RV32-NEXT:    sltiu a5, s2, -1
-; RV32-NEXT:    or a6, a0, a2
-; RV32-NEXT:    beqz a6, .LBB48_11
+; RV32-NEXT:    add a1, a1, s6
+; RV32-NEXT:    add s5, a0, s5
+; RV32-NEXT:    sltu a2, s5, a0
+; RV32-NEXT:    add a0, s5, s1
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:    add a1, a1, s7
+; RV32-NEXT:    sltu a2, a0, s5
+; RV32-NEXT:    add a1, a1, a2
+; RV32-NEXT:  .LBB48_6: # %fp-to-i-cleanup
+; RV32-NEXT:    lui a2, 524288
+; RV32-NEXT:    addi a3, a2, -1
+; RV32-NEXT:    beq s3, a3, .LBB48_8
+; RV32-NEXT:  # %bb.7: # %fp-to-i-cleanup
+; RV32-NEXT:    sltu a4, s3, a3
+; RV32-NEXT:    or a5, a0, a1
+; RV32-NEXT:    bnez a5, .LBB48_9
+; RV32-NEXT:    j .LBB48_10
+; RV32-NEXT:  .LBB48_8:
+; RV32-NEXT:    sltiu a4, s2, -1
+; RV32-NEXT:    or a5, a0, a1
+; RV32-NEXT:    beqz a5, .LBB48_10
+; RV32-NEXT:  .LBB48_9: # %fp-to-i-cleanup
+; RV32-NEXT:    srli a4, a1, 31
 ; RV32-NEXT:  .LBB48_10: # %fp-to-i-cleanup
-; RV32-NEXT:    srli a5, a2, 31
-; RV32-NEXT:  .LBB48_11: # %fp-to-i-cleanup
-; RV32-NEXT:    neg a6, a5
-; RV32-NEXT:    addi a7, a5, -1
-; RV32-NEXT:    bnez a5, .LBB48_13
-; RV32-NEXT:  # %bb.12: # %fp-to-i-cleanup
-; RV32-NEXT:    mv a1, a4
-; RV32-NEXT:  .LBB48_13: # %fp-to-i-cleanup
-; RV32-NEXT:    or a4, a7, s2
-; RV32-NEXT:    and a2, a6, a2
-; RV32-NEXT:    and a5, a6, a0
-; RV32-NEXT:    beq a1, a3, .LBB48_15
-; RV32-NEXT:  # %bb.14: # %fp-to-i-cleanup
-; RV32-NEXT:    sltu a0, a3, a1
-; RV32-NEXT:    j .LBB48_16
-; RV32-NEXT:  .LBB48_15:
-; RV32-NEXT:    snez a0, a4
-; RV32-NEXT:  .LBB48_16: # %fp-to-i-cleanup
-; RV32-NEXT:    and a5, a5, a2
-; RV32-NEXT:    li a3, -1
-; RV32-NEXT:    beq a5, a3, .LBB48_18
-; RV32-NEXT:  # %bb.17: # %fp-to-i-cleanup
-; RV32-NEXT:    srli a2, a2, 31
-; RV32-NEXT:    xori a0, a2, 1
-; RV32-NEXT:  .LBB48_18: # %fp-to-i-cleanup
-; RV32-NEXT:    bnez a0, .LBB48_20
-; RV32-NEXT:  # %bb.19: # %fp-to-i-cleanup
-; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:  .LBB48_20: # %fp-to-i-cleanup
+; RV32-NEXT:    neg a5, a4
+; RV32-NEXT:    addi a6, a4, -1
+; RV32-NEXT:    bnez a4, .LBB48_12
+; RV32-NEXT:  # %bb.11: # %fp-to-i-cleanup
+; RV32-NEXT:    mv s3, a3
+; RV32-NEXT:  .LBB48_12: # %fp-to-i-cleanup
+; RV32-NEXT:    or a3, a6, s2
+; RV32-NEXT:    and a1, a5, a1
+; RV32-NEXT:    and a4, a5, a0
+; RV32-NEXT:    beq s3, a2, .LBB48_14
+; RV32-NEXT:  # %bb.13: # %fp-to-i-cleanup
+; RV32-NEXT:    sltu a0, a2, s3
+; RV32-NEXT:    j .LBB48_15
+; RV32-NEXT:  .LBB48_14:
+; RV32-NEXT:    snez a0, a3
+; RV32-NEXT:  .LBB48_15: # %fp-to-i-cleanup
+; RV32-NEXT:    and a4, a4, a1
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    beq a4, a2, .LBB48_17
+; RV32-NEXT:  # %bb.16: # %fp-to-i-cleanup
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    xori a0, a1, 1
+; RV32-NEXT:  .LBB48_17: # %fp-to-i-cleanup
+; RV32-NEXT:    bnez a0, .LBB48_19
+; RV32-NEXT:  # %bb.18: # %fp-to-i-cleanup
+; RV32-NEXT:    lui s3, 524288
+; RV32-NEXT:  .LBB48_19: # %fp-to-i-cleanup
 ; RV32-NEXT:    neg a0, a0
-; RV32-NEXT:    and a0, a0, a4
+; RV32-NEXT:    and a0, a0, a3
+; RV32-NEXT:    mv a1, s3
 ; RV32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
@@ -7551,6 +7545,16 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    addi sp, sp, 128
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB48_20: # %fp-to-i-if-then5
+; RV32-NEXT:    .cfi_restore_state
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    xori a2, a1, 1
+; RV32-NEXT:    addi s2, a1, -1
+; RV32-NEXT:    sub a1, a0, a2
+; RV32-NEXT:    mv s3, s2
+; RV32-NEXT:    mv a0, s2
+; RV32-NEXT:    j .LBB48_6
 ;
 ; RV64-LABEL: stest_f32i64_mm:
 ; RV64:       # %bb.0: # %entry
@@ -7599,6 +7603,7 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV32-NEXT:    .cfi_offset s9, -44
 ; RV32-NEXT:    .cfi_offset s10, -48
 ; RV32-NEXT:    .cfi_offset s11, -52
+; RV32-NEXT:    .cfi_remember_state
 ; RV32-NEXT:    fmv.x.w a1, fa0
 ; RV32-NEXT:    slli a0, a1, 1
 ; RV32-NEXT:    srli a0, a0, 24
@@ -7609,23 +7614,14 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV32-NEXT:    li s9, 0
 ; RV32-NEXT:    li a0, 0
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    j .LBB49_7
+; RV32-NEXT:    j .LBB49_6
 ; RV32-NEXT:  .LBB49_2: # %fp-to-i-if-end
 ; RV32-NEXT:    addi a2, a0, -255
 ; RV32-NEXT:    sltu a3, a2, a0
 ; RV32-NEXT:    sltiu a2, a2, -128
 ; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    beqz a2, .LBB49_4
-; RV32-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    xori a2, a1, 1
-; RV32-NEXT:    addi s2, a1, -1
-; RV32-NEXT:    sub a1, a0, a2
-; RV32-NEXT:    mv s9, s2
-; RV32-NEXT:    mv a0, s2
-; RV32-NEXT:    j .LBB49_7
-; RV32-NEXT:  .LBB49_4: # %fp-to-i-if-end9
+; RV32-NEXT:    bnez a2, .LBB49_7
+; RV32-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32-NEXT:    srai s0, a1, 31
 ; RV32-NEXT:    slli a1, a1, 9
 ; RV32-NEXT:    lui a2, 2048
@@ -7633,8 +7629,8 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV32-NEXT:    srli a1, a1, 9
 ; RV32-NEXT:    or a1, a1, a2
 ; RV32-NEXT:    ori s8, s0, 1
-; RV32-NEXT:    bltu a3, a0, .LBB49_6
-; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT:    bltu a3, a0, .LBB49_5
+; RV32-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32-NEXT:    sw zero, 24(sp)
 ; RV32-NEXT:    sw zero, 28(sp)
 ; RV32-NEXT:    sw zero, 32(sp)
@@ -7726,8 +7722,8 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV32-NEXT:    add a1, a1, a2
 ; RV32-NEXT:    add a1, a3, a1
 ; RV32-NEXT:    add a1, a1, a5
-; RV32-NEXT:    j .LBB49_7
-; RV32-NEXT:  .LBB49_6: # %fp-to-i-if-else
+; RV32-NEXT:    j .LBB49_6
+; RV32-NEXT:  .LBB49_5: # %fp-to-i-if-else
 ; RV32-NEXT:    sw a1, 56(sp)
 ; RV32-NEXT:    sw zero, 60(sp)
 ; RV32-NEXT:    sw zero, 64(sp)
@@ -7813,7 +7809,7 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV32-NEXT:    add a1, a1, s7
 ; RV32-NEXT:    sltu a2, a0, s4
 ; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:  .LBB49_7: # %fp-to-i-cleanup
+; RV32-NEXT:  .LBB49_6: # %fp-to-i-cleanup
 ; RV32-NEXT:    or a2, a1, a0
 ; RV32-NEXT:    xori a0, a0, 1
 ; RV32-NEXT:    seqz a2, a2
@@ -7853,6 +7849,16 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV32-NEXT:    addi sp, sp, 128
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB49_7: # %fp-to-i-if-then5
+; RV32-NEXT:    .cfi_restore_state
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    xori a2, a1, 1
+; RV32-NEXT:    addi s2, a1, -1
+; RV32-NEXT:    sub a1, a0, a2
+; RV32-NEXT:    mv s9, s2
+; RV32-NEXT:    mv a0, s2
+; RV32-NEXT:    j .LBB49_6
 ;
 ; RV64-LABEL: utest_f32i64_mm:
 ; RV64:       # %bb.0: # %entry
@@ -7917,24 +7923,14 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; RV32-NEXT:    li s9, 0
 ; RV32-NEXT:    li a1, 0
 ; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    j .LBB50_6
+; RV32-NEXT:    j .LBB50_8
 ; RV32-NEXT:  .LBB50_2: # %fp-to-i-if-end
 ; RV32-NEXT:    addi a2, a0, -255
 ; RV32-NEXT:    sltu a3, a2, a0
 ; RV32-NEXT:    sltiu a2, a2, -128
 ; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    beqz a2, .LBB50_4
-; RV32-NEXT:  # %bb.3: # %fp-to-i-if-then5
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    lui a0, 524288
-; RV32-NEXT:    xori a2, a1, 1
-; RV32-NEXT:    addi s2, a1, -1
-; RV32-NEXT:    sub a0, a0, a2
-; RV32-NEXT:    mv s9, s2
-; RV32-NEXT:    mv a1, s2
-; RV32-NEXT:    beqz a0, .LBB50_6
-; RV32-NEXT:    j .LBB50_8
-; RV32-NEXT:  .LBB50_4: # %fp-to-i-if-end9
+; RV32-NEXT:    bnez a2, .LBB50_7
+; RV32-NEXT:  # %bb.3: # %fp-to-i-if-end9
 ; RV32-NEXT:    srai s0, a1, 31
 ; RV32-NEXT:    slli a1, a1, 9
 ; RV32-NEXT:    lui a2, 2048
@@ -7942,8 +7938,8 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; RV32-NEXT:    srli a1, a1, 9
 ; RV32-NEXT:    or a1, a1, a2
 ; RV32-NEXT:    ori s8, s0, 1
-; RV32-NEXT:    bltu a3, a0, .LBB50_7
-; RV32-NEXT:  # %bb.5: # %fp-to-i-if-then12
+; RV32-NEXT:    bltu a3, a0, .LBB50_5
+; RV32-NEXT:  # %bb.4: # %fp-to-i-if-then12
 ; RV32-NEXT:    sw zero, 24(sp)
 ; RV32-NEXT:    sw zero, 28(sp)
 ; RV32-NEXT:    sw zero, 32(sp)
@@ -8035,11 +8031,9 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; RV32-NEXT:    add a0, s6, a0
 ; RV32-NEXT:    add a0, a2, a0
 ; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    bnez a0, .LBB50_8
-; RV32-NEXT:  .LBB50_6:
-; RV32-NEXT:    seqz a2, a1
-; RV32-NEXT:    j .LBB50_9
-; RV32-NEXT:  .LBB50_7: # %fp-to-i-if-else
+; RV32-NEXT:    bnez a0, .LBB50_6
+; RV32-NEXT:    j .LBB50_8
+; RV32-NEXT:  .LBB50_5: # %fp-to-i-if-else
 ; RV32-NEXT:    sw a1, 56(sp)
 ; RV32-NEXT:    sw zero, 60(sp)
 ; RV32-NEXT:    sw zero, 64(sp)
@@ -8125,9 +8119,21 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; RV32-NEXT:    add a0, a0, s7
 ; RV32-NEXT:    sltu a2, a1, s4
 ; RV32-NEXT:    add a0, a0, a2
-; RV32-NEXT:    beqz a0, .LBB50_6
-; RV32-NEXT:  .LBB50_8: # %fp-to-i-cleanup
+; RV32-NEXT:    beqz a0, .LBB50_8
+; RV32-NEXT:  .LBB50_6: # %fp-to-i-cleanup
 ; RV32-NEXT:    srli a2, a0, 31
+; RV32-NEXT:    j .LBB50_9
+; RV32-NEXT:  .LBB50_7: # %fp-to-i-if-then5
+; RV32-NEXT:    srli a1, a1, 31
+; RV32-NEXT:    lui a0, 524288
+; RV32-NEXT:    xori a2, a1, 1
+; RV32-NEXT:    addi s2, a1, -1
+; RV32-NEXT:    sub a0, a0, a2
+; RV32-NEXT:    mv s9, s2
+; RV32-NEXT:    mv a1, s2
+; RV32-NEXT:    bnez a0, .LBB50_6
+; RV32-NEXT:  .LBB50_8:
+; RV32-NEXT:    seqz a2, a1
 ; RV32-NEXT:  .LBB50_9: # %fp-to-i-cleanup
 ; RV32-NEXT:    xori a1, a1, 1
 ; RV32-NEXT:    or a1, a1, a0
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
index d3a77de057ef5..ea893a27be3f8 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptosi129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
 ; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
 
-define i129 @halftosi129(half %a) {
+define i129 @halftosi129(half %a) !prof !0 {
 ; CHECK-LABEL: @halftosi129(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptosi half [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i129
@@ -12,29 +12,29 @@ define i129 @halftosi129(half %a) {
   ret i129 %conv
 }
 
-define i129 @floattosi129(float %a) {
+define i129 @floattosi129(float %a) !prof !0 {
 ; CHECK-LABEL: @floattosi129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = lshr i129 [[TMP1]], 23
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i129 [[TMP4]], 255
 ; CHECK-NEXT:    [[TMP6:%.*]] = and i129 [[TMP1]], 8388607
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i129 [[TMP6]], 8388608
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 127
-; CHECK-NEXT:    br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i129 [[TMP5]], -256
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       fp-to-i-if-then5:
-; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
+; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456, !prof [[PROF1]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-end9:
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i129 [[TMP5]], 150
-; CHECK-NEXT:    br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]], !prof [[PROF1]]
 ; CHECK:       fp-to-i-if-then12:
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub i129 150, [[TMP5]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = lshr i129 [[TMP7]], [[TMP13]]
@@ -69,7 +69,7 @@ define i129 @doubletosi129(double %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i129 [[TMP5]], -1152
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -111,7 +111,7 @@ define i129 @x86_fp80tosi129(x86_fp80 %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[TMP6]], -16512
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -152,7 +152,7 @@ define i129 @fp128tosi129(fp128 %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i129 [[TMP5]], -16512
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -194,7 +194,7 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
 ; CHECK:       fp-to-i-if-end2:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[TMP6]], -256
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then53:
 ; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
@@ -228,7 +228,7 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP32:%.*]] = add i129 [[TMP28]], -256
 ; CHECK-NEXT:    [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129
-; CHECK-NEXT:    br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -253,3 +253,10 @@ define <2 x i129> @floattosi129v2(<2 x float> %a) {
   %conv = fptosi <2 x float> %a to <2 x i129>
   ret <2 x i129> %conv
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"unknown", !"expand-ir-insts"}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
index 07de91d404988..816f2a015e725 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-fptoui129.ll
@@ -1,8 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
 ; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
 
-define i129 @halftoui129(half %a) {
+define i129 @halftoui129(half %a) !prof !0 {
 ; CHECK-LABEL: @halftoui129(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptoui half [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i129
@@ -12,29 +12,29 @@ define i129 @halftoui129(half %a) {
   ret i129 %conv
 }
 
-define i129 @floattoui129(float %a) {
+define i129 @floattoui129(float %a)  !prof !0 {
 ; CHECK-LABEL: @floattoui129(
 ; CHECK-NEXT:  fp-to-i-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A:%.*]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i129
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP0]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i129 1, i129 -1, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = lshr i129 [[TMP1]], 23
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i129 [[TMP4]], 255
 ; CHECK-NEXT:    [[TMP6:%.*]] = and i129 [[TMP1]], 8388607
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i129 [[TMP6]], 8388608
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i129 [[TMP5]], 127
-; CHECK-NEXT:    br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i129 [[TMP5]], -256
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       fp-to-i-if-then5:
-; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
+; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456, !prof [[PROF1]]
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
 ; CHECK:       fp-to-i-if-end9:
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i129 [[TMP5]], 150
-; CHECK-NEXT:    br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]], !prof [[PROF1]]
 ; CHECK:       fp-to-i-if-then12:
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub i129 150, [[TMP5]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = lshr i129 [[TMP7]], [[TMP13]]
@@ -69,7 +69,7 @@ define i129 @doubletoui129(double %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i129 [[TMP5]], -1152
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -111,7 +111,7 @@ define i129 @x86_fp80toui129(x86_fp80 %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[TMP6]], -16512
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -152,7 +152,7 @@ define i129 @fp128toui129(fp128 %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i129 [[TMP5]], -16512
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i129 [[TMP9]], -129
-; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[TMP2]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -194,7 +194,7 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
 ; CHECK:       fp-to-i-if-end2:
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i129 [[TMP6]], -256
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129
-; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then53:
 ; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP1]]
@@ -228,7 +228,7 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
 ; CHECK:       fp-to-i-if-end:
 ; CHECK-NEXT:    [[TMP32:%.*]] = add i129 [[TMP28]], -256
 ; CHECK-NEXT:    [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129
-; CHECK-NEXT:    br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]]
+; CHECK-NEXT:    br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]], !prof [[PROF2]]
 ; CHECK:       fp-to-i-if-then5:
 ; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456
 ; CHECK-NEXT:    br label [[FP_TO_I_CLEANUP]]
@@ -253,3 +253,10 @@ define <2 x i129> @floattoui129v2(<2 x float> %a) {
   %conv = fptoui <2 x float> %a to <2 x i129>
   ret <2 x i129> %conv
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"unknown", !"expand-ir-insts"}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll
index fab6e431872e7..d035d6e11fc05 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-si129tofp.ll
@@ -1,12 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
 ; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
 
-define half @si129tohalf(i129 %a) {
+define half @si129tohalf(i129 %a) !prof !0 {
 ; CHECK-LABEL: @si129tohalf(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -16,12 +16,12 @@ define half @si129tohalf(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3:![0-9]+]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -51,7 +51,7 @@ define half @si129tohalf(i129 %a) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP32:%.*]] = ashr i129 [[TMP26]], 3
 ; CHECK-NEXT:    [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -91,7 +91,7 @@ define float @si129tofloat(i129 %a) {
 ; CHECK-LABEL: @si129tofloat(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -101,12 +101,12 @@ define float @si129tofloat(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -136,7 +136,7 @@ define float @si129tofloat(i129 %a) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP32:%.*]] = ashr i129 [[TMP26]], 3
 ; CHECK-NEXT:    [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -175,7 +175,7 @@ define double @si129todouble(i129 %a) {
 ; CHECK-LABEL: @si129todouble(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -185,12 +185,12 @@ define double @si129todouble(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 53
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 54, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 55, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -220,7 +220,7 @@ define double @si129todouble(i129 %a) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP27]] to i64
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP32:%.*]] = ashr i129 [[TMP26]], 3
 ; CHECK-NEXT:    [[TMP33:%.*]] = trunc i129 [[TMP32]] to i64
@@ -264,7 +264,7 @@ define x86_fp80 @si129tox86_fp80(i129 %a) {
 ; CHECK-LABEL: @si129tox86_fp80(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -274,12 +274,12 @@ define x86_fp80 @si129tox86_fp80(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -307,7 +307,7 @@ define x86_fp80 @si129tox86_fp80(i129 %a) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
 ; CHECK-NEXT:    [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP30:%.*]] = ashr i129 [[TMP24]], 3
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -348,7 +348,7 @@ define fp128 @si129tofp128(i129 %a) {
 ; CHECK-LABEL: @si129tofp128(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -358,12 +358,12 @@ define fp128 @si129tofp128(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[TMP3]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -391,7 +391,7 @@ define fp128 @si129tofp128(i129 %a) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
 ; CHECK-NEXT:    [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP30:%.*]] = ashr i129 [[TMP24]], 3
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -432,7 +432,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:  itofp-entryitofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i129> [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i129 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end2:
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr i129 [[TMP0]], 128
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i129 [[TMP2]], [[TMP0]]
@@ -442,12 +442,12 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 129, [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 128, [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 24
-; CHECK-NEXT:    br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then43:
 ; CHECK-NEXT:    switch i32 [[TMP7]], label [[ITOFP_SW_DEFAULT5:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB4:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG6:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb4:
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl i129 [[TMP4]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG6]]
@@ -477,7 +477,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP30:%.*]] = trunc i129 [[TMP28]] to i32
 ; CHECK-NEXT:    [[TMP31:%.*]] = lshr i129 [[TMP28]], 32
 ; CHECK-NEXT:    [[TMP32:%.*]] = trunc i129 [[TMP31]] to i32
-; CHECK-NEXT:    br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]]
+; CHECK-NEXT:    br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then207:
 ; CHECK-NEXT:    [[TMP33:%.*]] = ashr i129 [[TMP27]], 3
 ; CHECK-NEXT:    [[TMP34:%.*]] = trunc i129 [[TMP33]] to i32
@@ -509,7 +509,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
 ; CHECK-NEXT:    [[TMP55:%.*]] = extractelement <2 x i129> [[A]], i64 1
 ; CHECK-NEXT:    [[TMP56:%.*]] = icmp eq i129 [[TMP55]], 0
-; CHECK-NEXT:    br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP57:%.*]] = ashr i129 [[TMP55]], 128
 ; CHECK-NEXT:    [[TMP58:%.*]] = xor i129 [[TMP57]], [[TMP55]]
@@ -519,12 +519,12 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP62:%.*]] = sub i32 129, [[TMP61]]
 ; CHECK-NEXT:    [[TMP63:%.*]] = sub i32 128, [[TMP61]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = icmp sgt i32 [[TMP62]], 24
-; CHECK-NEXT:    br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP62]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP65:%.*]] = shl i129 [[TMP59]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -554,7 +554,7 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP85:%.*]] = trunc i129 [[TMP83]] to i32
 ; CHECK-NEXT:    [[TMP86:%.*]] = lshr i129 [[TMP83]], 32
 ; CHECK-NEXT:    [[TMP87:%.*]] = trunc i129 [[TMP86]] to i32
-; CHECK-NEXT:    br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP88:%.*]] = ashr i129 [[TMP82]], 3
 ; CHECK-NEXT:    [[TMP89:%.*]] = trunc i129 [[TMP88]] to i32
@@ -589,3 +589,13 @@ define <2 x float> @si129tofloatv2(<2 x i129> %a) {
   %conv = sitofp <2 x i129> %a to <2 x float>
   ret <2 x float> %conv
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1, i32 1}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll
index 3a3a8e40ea8d1..562200989ae8c 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-convert-ui129tofp.ll
@@ -1,12 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
 ; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
 
-define half @ui129tohalf(i129 %a) {
+define half @ui129tohalf(i129 %a) !prof !0 {
 ; CHECK-LABEL: @ui129tohalf(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -16,12 +16,12 @@ define half @ui129tohalf(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3:![0-9]+]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -51,7 +51,7 @@ define half @ui129tohalf(i129 %a) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP32:%.*]] = lshr i129 [[TMP26]], 3
 ; CHECK-NEXT:    [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -91,7 +91,7 @@ define float @ui129tofloat(i129 %a) {
 ; CHECK-LABEL: @ui129tofloat(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -101,12 +101,12 @@ define float @ui129tofloat(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 24
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -136,7 +136,7 @@ define float @ui129tofloat(i129 %a) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP27]] to i32
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP32:%.*]] = lshr i129 [[TMP26]], 3
 ; CHECK-NEXT:    [[TMP33:%.*]] = trunc i129 [[TMP32]] to i32
@@ -175,7 +175,7 @@ define double @ui129todouble(i129 %a) {
 ; CHECK-LABEL: @ui129todouble(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -185,12 +185,12 @@ define double @ui129todouble(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 129, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 128, [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], 53
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 54, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 55, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -220,7 +220,7 @@ define double @ui129todouble(i129 %a) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP27]] to i64
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP27]], 32
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i32
-; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP32:%.*]] = lshr i129 [[TMP26]], 3
 ; CHECK-NEXT:    [[TMP33:%.*]] = trunc i129 [[TMP32]] to i64
@@ -264,7 +264,7 @@ define x86_fp80 @ui129tox86_fp80(i129 %a) {
 ; CHECK-LABEL: @ui129tox86_fp80(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -274,12 +274,12 @@ define x86_fp80 @ui129tox86_fp80(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -307,7 +307,7 @@ define x86_fp80 @ui129tox86_fp80(i129 %a) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
 ; CHECK-NEXT:    [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP24]], 3
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -348,7 +348,7 @@ define fp128 @ui129tofp128(i129 %a) {
 ; CHECK-LABEL: @ui129tofp128(
 ; CHECK-NEXT:  itofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i129 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i129 [[A]], 128
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor i129 [[TMP1]], [[A]]
@@ -358,12 +358,12 @@ define fp128 @ui129tofp128(i129 %a) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub i129 129, [[TMP4]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i129 128, [[TMP4]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i129 [[TMP6]], 113
-; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP8]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i129 [[TMP6]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i129 114, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i129 115, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl i129 [[A]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -391,7 +391,7 @@ define fp128 @ui129tofp128(i129 %a) {
 ; CHECK-NEXT:    [[TMP27:%.*]] = trunc i129 [[TMP25]] to i128
 ; CHECK-NEXT:    [[TMP28:%.*]] = lshr i129 [[TMP25]], 32
 ; CHECK-NEXT:    [[TMP29:%.*]] = trunc i129 [[TMP7]] to i64
-; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP30:%.*]] = lshr i129 [[TMP24]], 3
 ; CHECK-NEXT:    [[TMP31:%.*]] = trunc i129 [[TMP30]] to i128
@@ -432,7 +432,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:  itofp-entryitofp-entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i129> [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i129 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end2:
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr i129 [[TMP0]], 128
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i129 [[TMP2]], [[TMP0]]
@@ -442,12 +442,12 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 129, [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 128, [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 24
-; CHECK-NEXT:    br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then43:
 ; CHECK-NEXT:    switch i32 [[TMP7]], label [[ITOFP_SW_DEFAULT5:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB4:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG6:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb4:
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl i129 [[TMP0]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG6]]
@@ -477,7 +477,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP30:%.*]] = trunc i129 [[TMP28]] to i32
 ; CHECK-NEXT:    [[TMP31:%.*]] = lshr i129 [[TMP28]], 32
 ; CHECK-NEXT:    [[TMP32:%.*]] = trunc i129 [[TMP31]] to i32
-; CHECK-NEXT:    br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]]
+; CHECK-NEXT:    br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then207:
 ; CHECK-NEXT:    [[TMP33:%.*]] = lshr i129 [[TMP27]], 3
 ; CHECK-NEXT:    [[TMP34:%.*]] = trunc i129 [[TMP33]] to i32
@@ -509,7 +509,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP54:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
 ; CHECK-NEXT:    [[TMP55:%.*]] = extractelement <2 x i129> [[A]], i64 1
 ; CHECK-NEXT:    [[TMP56:%.*]] = icmp eq i129 [[TMP55]], 0
-; CHECK-NEXT:    br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]], !prof [[PROF1]]
 ; CHECK:       itofp-if-end:
 ; CHECK-NEXT:    [[TMP57:%.*]] = ashr i129 [[TMP55]], 128
 ; CHECK-NEXT:    [[TMP58:%.*]] = xor i129 [[TMP57]], [[TMP55]]
@@ -519,12 +519,12 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP62:%.*]] = sub i32 129, [[TMP61]]
 ; CHECK-NEXT:    [[TMP63:%.*]] = sub i32 128, [[TMP61]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = icmp sgt i32 [[TMP62]], 24
-; CHECK-NEXT:    br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]]
+; CHECK-NEXT:    br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then4:
 ; CHECK-NEXT:    switch i32 [[TMP62]], label [[ITOFP_SW_DEFAULT:%.*]] [
 ; CHECK-NEXT:      i32 25, label [[ITOFP_SW_BB:%.*]]
 ; CHECK-NEXT:      i32 26, label [[ITOFP_SW_EPILOG:%.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3]]
 ; CHECK:       itofp-sw-bb:
 ; CHECK-NEXT:    [[TMP65:%.*]] = shl i129 [[TMP55]], 1
 ; CHECK-NEXT:    br label [[ITOFP_SW_EPILOG]]
@@ -554,7 +554,7 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
 ; CHECK-NEXT:    [[TMP85:%.*]] = trunc i129 [[TMP83]] to i32
 ; CHECK-NEXT:    [[TMP86:%.*]] = lshr i129 [[TMP83]], 32
 ; CHECK-NEXT:    [[TMP87:%.*]] = trunc i129 [[TMP86]] to i32
-; CHECK-NEXT:    br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]]
+; CHECK-NEXT:    br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]], !prof [[PROF2]]
 ; CHECK:       itofp-if-then20:
 ; CHECK-NEXT:    [[TMP88:%.*]] = lshr i129 [[TMP82]], 3
 ; CHECK-NEXT:    [[TMP89:%.*]] = trunc i129 [[TMP88]] to i32
@@ -589,3 +589,13 @@ define <2 x float> @ui129tofloatv2(<2 x i129> %a) {
   %conv = uitofp <2 x i129> %a to <2 x float>
   ret <2 x float> %conv
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1, i32 1}
+;.
diff --git a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll
index 5b622c1ad77eb..919fb0aa13c50 100644
--- a/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll
+++ b/llvm/test/Transforms/ExpandIRInsts/X86/expand-large-fp-optnone.ll
@@ -1,16 +1,16 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt -S -mtriple=x86_64-- --expand-ir-insts < %s | FileCheck %s
 ; RUN: opt -S -mtriple=x86_64-- -passes='require<libcall-lowering-info>,expand-ir-insts' < %s | FileCheck %s
 
 ; expand-ir-insts must also run with optnone
 
 ; Function Attrs: noinline optnone
-define double @main(i224 %0) #0 {
+define double @main(i224 %0) #0 !prof !0 {
 ; CHECK-LABEL: define double @main(
-; CHECK-SAME: i224 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: i224 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF0:![0-9]+]] {
 ; CHECK-NEXT:  [[ENTRYITOFP_ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i224 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[ITOFP_RETURN:.*]], label %[[ITOFP_IF_END:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[ITOFP_RETURN:.*]], label %[[ITOFP_IF_END:.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       [[ITOFP_IF_END]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr i224 [[TMP0]], 223
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i224 [[TMP2]], [[TMP0]]
@@ -20,12 +20,12 @@ define double @main(i224 %0) #0 {
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 224, [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 223, [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 53
-; CHECK-NEXT:    br i1 [[TMP9]], label %[[ITOFP_IF_THEN4:.*]], label %[[ITOFP_IF_ELSE:.*]]
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[ITOFP_IF_THEN4:.*]], label %[[ITOFP_IF_ELSE:.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       [[ITOFP_IF_THEN4]]:
 ; CHECK-NEXT:    switch i32 [[TMP7]], label %[[ITOFP_SW_DEFAULT:.*]] [
 ; CHECK-NEXT:      i32 54, label %[[ITOFP_SW_BB:.*]]
 ; CHECK-NEXT:      i32 55, label %[[ITOFP_SW_EPILOG:.*]]
-; CHECK-NEXT:    ]
+; CHECK-NEXT:    ], !prof [[PROF3:![0-9]+]]
 ; CHECK:       [[ITOFP_SW_BB]]:
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl i224 [[TMP4]], 1
 ; CHECK-NEXT:    br label %[[ITOFP_SW_EPILOG]]
@@ -55,7 +55,7 @@ define double @main(i224 %0) #0 {
 ; CHECK-NEXT:    [[TMP30:%.*]] = trunc i224 [[TMP28]] to i64
 ; CHECK-NEXT:    [[TMP31:%.*]] = lshr i224 [[TMP28]], 32
 ; CHECK-NEXT:    [[TMP32:%.*]] = trunc i224 [[TMP31]] to i32
-; CHECK-NEXT:    br i1 [[TMP29]], label %[[ITOFP_IF_END26:.*]], label %[[ITOFP_IF_THEN20:.*]]
+; CHECK-NEXT:    br i1 [[TMP29]], label %[[ITOFP_IF_END26:.*]], label %[[ITOFP_IF_THEN20:.*]], !prof [[PROF2]]
 ; CHECK:       [[ITOFP_IF_THEN20]]:
 ; CHECK-NEXT:    [[TMP33:%.*]] = ashr i224 [[TMP27]], 3
 ; CHECK-NEXT:    [[TMP34:%.*]] = trunc i224 [[TMP33]] to i64
@@ -97,3 +97,14 @@ entry:
 }
 
 attributes #0 = { noinline optnone }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0]] = { noinline optnone }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1, i32 1}
+;.
diff --git a/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp b/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp
index b698756dd75e9..5a4679ef31422 100644
--- a/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp
+++ b/mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp
@@ -170,24 +170,16 @@ static void collectUnderlyingAddressValues(BlockArgument arg, unsigned maxDepth,
     // the entry block.
     SmallVector<RegionSuccessor> successors;
     branch.getSuccessorRegions(RegionBranchPoint::parent(), successors);
-    RegionSuccessor regionSuccessor(region);
-    bool found = false;
     for (RegionSuccessor &successor : successors) {
       if (successor.getSuccessor() == region) {
         LDBG() << "  Found matching region successor: " << successor;
-        found = true;
-        regionSuccessor = successor;
-        break;
+        return collectUnderlyingAddressValues2(
+            branch, successor, arg, argNumber, maxDepth, visited, output);
       }
     }
-    if (!found) {
-      LDBG()
-          << "  No matching region successor found, adding argument to output";
-      output.push_back(arg);
-      return;
-    }
-    return collectUnderlyingAddressValues2(
-        branch, regionSuccessor, arg, argNumber, maxDepth, visited, output);
+    LDBG() << "  No matching region successor found, adding argument to output";
+    output.push_back(arg);
+    return;
   }
 
   LDBG()



More information about the Mlir-commits mailing list