[llvm] r356741 - [RISCV] Optimize emission of SELECT sequences

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 03:45:04 PDT 2019


Author: asb
Date: Fri Mar 22 03:45:03 2019
New Revision: 356741

URL: http://llvm.org/viewvc/llvm-project?rev=356741&view=rev
Log:
[RISCV] Optimize emission of SELECT sequences

This patch optimizes the emission of a sequence of SELECTs with the same
condition, avoiding the insertion of unnecessary control flow. Such a sequence
often occurs when a SELECT of values wider than XLEN is legalized into two
SELECTs with legal types. We have identified several use cases where the
SELECTs could be interleaved with other instructions. Therefore, we extend the
sequence to include non-SELECT instructions if we are able to detect that the
non-SELECT instructions do not impact the optimization.

This patch supersedes https://reviews.llvm.org/D59096, which attempted to
address this issue by introducing a new SelectionDAG node. Hat tip to Eli
Friedman for his feedback on how to best handle this issue.

Differential Revision: https://reviews.llvm.org/D59355
Patch by Luís Marques.


Added:
    llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.ll
    llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.mir
Modified:
    llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll

Modified: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp?rev=356741&r1=356740&r2=356741&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp Fri Mar 22 03:45:03 2019
@@ -17,6 +17,7 @@
 #include "RISCVRegisterInfo.h"
 #include "RISCVSubtarget.h"
 #include "RISCVTargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -787,10 +788,21 @@ static MachineBasicBlock *emitBuildPairF
   return BB;
 }
 
+static bool isSelectPseudo(MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+  case RISCV::Select_GPR_Using_CC_GPR:
+  case RISCV::Select_FPR32_Using_CC_GPR:
+  case RISCV::Select_FPR64_Using_CC_GPR:
+    return true;
+  }
+}
+
 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
                                            MachineBasicBlock *BB) {
-  // To "insert" a SELECT instruction, we actually have to insert the triangle
-  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // To "insert" Select_* instructions, we actually have to insert the triangle
+  // control-flow pattern.  The incoming instructions know the destination vreg
   // to set, the condition code register to branch on, the true/false values to
   // select between, and the condcode to use to select the appropriate branch.
   //
@@ -800,6 +812,54 @@ static MachineBasicBlock *emitSelectPseu
   //     |  IfFalseMBB
   //     | /
   //    TailMBB
+  //
+  // When we find a sequence of selects we attempt to optimize their emission
+  // by sharing the control flow. Currently we only handle cases where we have
+  // multiple selects with the exact same condition (same LHS, RHS and CC).
+  // The selects may be interleaved with other instructions if the other
+  // instructions meet some requirements we deem safe:
+  // - They are debug instructions. Otherwise,
+  // - They do not have side-effects, do not access memory and their inputs do
+  //   not depend on the results of the select pseudo-instructions.
+  // The TrueV/FalseV operands of the selects cannot depend on the result of
+  // previous selects in the sequence.
+  // These conditions could be further relaxed. See the X86 target for a
+  // related approach and more information.
+  unsigned LHS = MI.getOperand(1).getReg();
+  unsigned RHS = MI.getOperand(2).getReg();
+  auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
+
+  SmallVector<MachineInstr *, 4> SelectDebugValues;
+  SmallSet<unsigned, 4> SelectDests;
+  SelectDests.insert(MI.getOperand(0).getReg());
+
+  MachineInstr *LastSelectPseudo = &MI;
+
+  for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
+       SequenceMBBI != E; ++SequenceMBBI) {
+    if (SequenceMBBI->isDebugInstr())
+      continue;
+    else if (isSelectPseudo(*SequenceMBBI)) {
+      if (SequenceMBBI->getOperand(1).getReg() != LHS ||
+          SequenceMBBI->getOperand(2).getReg() != RHS ||
+          SequenceMBBI->getOperand(3).getImm() != CC ||
+          SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
+          SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
+        break;
+      LastSelectPseudo = &*SequenceMBBI;
+      SequenceMBBI->collectDebugValues(SelectDebugValues);
+      SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
+    } else {
+      if (SequenceMBBI->hasUnmodeledSideEffects() ||
+          SequenceMBBI->mayLoadOrStore())
+        break;
+      if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
+            return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
+          }))
+        break;
+    }
+  }
+
   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   DebugLoc DL = MI.getDebugLoc();
@@ -812,20 +872,23 @@ static MachineBasicBlock *emitSelectPseu
 
   F->insert(I, IfFalseMBB);
   F->insert(I, TailMBB);
-  // Move all remaining instructions to TailMBB.
-  TailMBB->splice(TailMBB->begin(), HeadMBB, std::next(MI.getIterator()),
-                  HeadMBB->end());
+
+  // Transfer debug instructions associated with the selects to TailMBB.
+  for (MachineInstr *DebugInstr : SelectDebugValues) {
+    TailMBB->push_back(DebugInstr->removeFromParent());
+  }
+
+  // Move all instructions after the sequence to TailMBB.
+  TailMBB->splice(TailMBB->end(), HeadMBB,
+                  std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
   // Update machine-CFG edges by transferring all successors of the current
-  // block to the new block which will contain the Phi node for the select.
+  // block to the new block which will contain the Phi nodes for the selects.
   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
   // Set the successors for HeadMBB.
   HeadMBB->addSuccessor(IfFalseMBB);
   HeadMBB->addSuccessor(TailMBB);
 
   // Insert appropriate branch.
-  unsigned LHS = MI.getOperand(1).getReg();
-  unsigned RHS = MI.getOperand(2).getReg();
-  auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
 
   BuildMI(HeadMBB, DL, TII.get(Opcode))
@@ -836,15 +899,25 @@ static MachineBasicBlock *emitSelectPseu
   // IfFalseMBB just falls through to TailMBB.
   IfFalseMBB->addSuccessor(TailMBB);
 
-  // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
-  BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
-          MI.getOperand(0).getReg())
-      .addReg(MI.getOperand(4).getReg())
-      .addMBB(HeadMBB)
-      .addReg(MI.getOperand(5).getReg())
-      .addMBB(IfFalseMBB);
+  // Create PHIs for all of the select pseudo-instructions.
+  auto SelectMBBI = MI.getIterator();
+  auto SelectEnd = std::next(LastSelectPseudo->getIterator());
+  auto InsertionPoint = TailMBB->begin();
+  while (SelectMBBI != SelectEnd) {
+    auto Next = std::next(SelectMBBI);
+    if (isSelectPseudo(*SelectMBBI)) {
+      // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
+      BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
+              TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
+          .addReg(SelectMBBI->getOperand(4).getReg())
+          .addMBB(HeadMBB)
+          .addReg(SelectMBBI->getOperand(5).getReg())
+          .addMBB(IfFalseMBB);
+      SelectMBBI->eraseFromParent();
+    }
+    SelectMBBI = Next;
+  }
 
-  MI.eraseFromParent(); // The pseudo instruction is gone now.
   return TailMBB;
 }
 

Modified: llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll?rev=356741&r1=356740&r2=356741&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll Fri Mar 22 03:45:03 2019
@@ -14574,24 +14574,19 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
 ; RV32I-NEXT:    slt a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB200_4
-; RV32I-NEXT:    j .LBB200_5
+; RV32I-NEXT:    j .LBB200_4
 ; RV32I-NEXT:  .LBB200_3: # in Loop: Header=BB200_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB200_5
 ; RV32I-NEXT:  .LBB200_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB200_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB200_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB200_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB200_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB200_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB200_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -14602,7 +14597,7 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB200_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -14632,24 +14627,19 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
 ; RV32IA-NEXT:    slt a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB200_4
-; RV32IA-NEXT:    j .LBB200_5
+; RV32IA-NEXT:    j .LBB200_4
 ; RV32IA-NEXT:  .LBB200_3: # in Loop: Header=BB200_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB200_5
 ; RV32IA-NEXT:  .LBB200_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB200_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB200_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB200_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB200_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB200_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB200_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -14660,7 +14650,7 @@ define i64 @atomicrmw_max_i64_monotonic(
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB200_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -14735,24 +14725,19 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
 ; RV32I-NEXT:    slt a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB201_4
-; RV32I-NEXT:    j .LBB201_5
+; RV32I-NEXT:    j .LBB201_4
 ; RV32I-NEXT:  .LBB201_3: # in Loop: Header=BB201_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB201_5
 ; RV32I-NEXT:  .LBB201_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB201_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB201_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB201_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB201_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB201_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB201_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -14763,7 +14748,7 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB201_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -14793,24 +14778,19 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
 ; RV32IA-NEXT:    slt a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB201_4
-; RV32IA-NEXT:    j .LBB201_5
+; RV32IA-NEXT:    j .LBB201_4
 ; RV32IA-NEXT:  .LBB201_3: # in Loop: Header=BB201_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB201_5
 ; RV32IA-NEXT:  .LBB201_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB201_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB201_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB201_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB201_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB201_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB201_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -14821,7 +14801,7 @@ define i64 @atomicrmw_max_i64_acquire(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB201_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -14896,24 +14876,19 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
 ; RV32I-NEXT:    slt a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB202_4
-; RV32I-NEXT:    j .LBB202_5
+; RV32I-NEXT:    j .LBB202_4
 ; RV32I-NEXT:  .LBB202_3: # in Loop: Header=BB202_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB202_5
 ; RV32I-NEXT:  .LBB202_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB202_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB202_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB202_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB202_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB202_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB202_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -14924,7 +14899,7 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB202_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -14954,24 +14929,19 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
 ; RV32IA-NEXT:    slt a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB202_4
-; RV32IA-NEXT:    j .LBB202_5
+; RV32IA-NEXT:    j .LBB202_4
 ; RV32IA-NEXT:  .LBB202_3: # in Loop: Header=BB202_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB202_5
 ; RV32IA-NEXT:  .LBB202_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB202_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB202_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB202_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB202_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB202_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB202_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -14982,7 +14952,7 @@ define i64 @atomicrmw_max_i64_release(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB202_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15057,24 +15027,19 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
 ; RV32I-NEXT:    slt a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB203_4
-; RV32I-NEXT:    j .LBB203_5
+; RV32I-NEXT:    j .LBB203_4
 ; RV32I-NEXT:  .LBB203_3: # in Loop: Header=BB203_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB203_5
 ; RV32I-NEXT:  .LBB203_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB203_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB203_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB203_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB203_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB203_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB203_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -15085,7 +15050,7 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB203_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15115,24 +15080,19 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
 ; RV32IA-NEXT:    slt a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB203_4
-; RV32IA-NEXT:    j .LBB203_5
+; RV32IA-NEXT:    j .LBB203_4
 ; RV32IA-NEXT:  .LBB203_3: # in Loop: Header=BB203_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB203_5
 ; RV32IA-NEXT:  .LBB203_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB203_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB203_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB203_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB203_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB203_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB203_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -15143,7 +15103,7 @@ define i64 @atomicrmw_max_i64_acq_rel(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB203_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15218,24 +15178,19 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
 ; RV32I-NEXT:    slt a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB204_4
-; RV32I-NEXT:    j .LBB204_5
+; RV32I-NEXT:    j .LBB204_4
 ; RV32I-NEXT:  .LBB204_3: # in Loop: Header=BB204_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB204_5
 ; RV32I-NEXT:  .LBB204_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB204_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB204_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB204_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB204_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB204_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB204_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -15246,7 +15201,7 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB204_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15276,24 +15231,19 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
 ; RV32IA-NEXT:    slt a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB204_4
-; RV32IA-NEXT:    j .LBB204_5
+; RV32IA-NEXT:    j .LBB204_4
 ; RV32IA-NEXT:  .LBB204_3: # in Loop: Header=BB204_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB204_5
 ; RV32IA-NEXT:  .LBB204_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB204_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB204_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB204_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB204_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB204_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB204_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -15304,7 +15254,7 @@ define i64 @atomicrmw_max_i64_seq_cst(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB204_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15386,18 +15336,13 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB205_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB205_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB205_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB205_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB205_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -15408,7 +15353,7 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB205_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15445,18 +15390,13 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB205_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB205_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB205_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB205_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB205_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB205_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -15467,7 +15407,7 @@ define i64 @atomicrmw_min_i64_monotonic(
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB205_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15549,18 +15489,13 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB206_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB206_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB206_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB206_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB206_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -15571,7 +15506,7 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB206_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15608,18 +15543,13 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB206_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB206_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB206_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB206_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB206_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB206_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -15630,7 +15560,7 @@ define i64 @atomicrmw_min_i64_acquire(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB206_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15712,18 +15642,13 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB207_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB207_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB207_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB207_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB207_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -15734,7 +15659,7 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB207_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15771,18 +15696,13 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB207_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB207_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB207_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB207_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB207_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB207_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -15793,7 +15713,7 @@ define i64 @atomicrmw_min_i64_release(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB207_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -15875,18 +15795,13 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB208_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB208_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB208_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB208_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB208_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -15897,7 +15812,7 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB208_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -15934,18 +15849,13 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB208_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB208_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB208_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB208_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB208_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB208_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -15956,7 +15866,7 @@ define i64 @atomicrmw_min_i64_acq_rel(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB208_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16038,18 +15948,13 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB209_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB209_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB209_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB209_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB209_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -16060,7 +15965,7 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB209_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16097,18 +16002,13 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB209_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB209_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB209_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB209_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB209_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB209_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -16119,7 +16019,7 @@ define i64 @atomicrmw_min_i64_seq_cst(i6
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB209_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16194,24 +16094,19 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB210_4
-; RV32I-NEXT:    j .LBB210_5
+; RV32I-NEXT:    j .LBB210_4
 ; RV32I-NEXT:  .LBB210_3: # in Loop: Header=BB210_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB210_5
 ; RV32I-NEXT:  .LBB210_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB210_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB210_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB210_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB210_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB210_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB210_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -16222,7 +16117,7 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB210_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16252,24 +16147,19 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB210_4
-; RV32IA-NEXT:    j .LBB210_5
+; RV32IA-NEXT:    j .LBB210_4
 ; RV32IA-NEXT:  .LBB210_3: # in Loop: Header=BB210_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB210_5
 ; RV32IA-NEXT:  .LBB210_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB210_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB210_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB210_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB210_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB210_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB210_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -16280,7 +16170,7 @@ define i64 @atomicrmw_umax_i64_monotonic
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB210_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16355,24 +16245,19 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB211_4
-; RV32I-NEXT:    j .LBB211_5
+; RV32I-NEXT:    j .LBB211_4
 ; RV32I-NEXT:  .LBB211_3: # in Loop: Header=BB211_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB211_5
 ; RV32I-NEXT:  .LBB211_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB211_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB211_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB211_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB211_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB211_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB211_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -16383,7 +16268,7 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB211_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16413,24 +16298,19 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB211_4
-; RV32IA-NEXT:    j .LBB211_5
+; RV32IA-NEXT:    j .LBB211_4
 ; RV32IA-NEXT:  .LBB211_3: # in Loop: Header=BB211_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB211_5
 ; RV32IA-NEXT:  .LBB211_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB211_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB211_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB211_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB211_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB211_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB211_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -16441,7 +16321,7 @@ define i64 @atomicrmw_umax_i64_acquire(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB211_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16516,24 +16396,19 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB212_4
-; RV32I-NEXT:    j .LBB212_5
+; RV32I-NEXT:    j .LBB212_4
 ; RV32I-NEXT:  .LBB212_3: # in Loop: Header=BB212_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB212_5
 ; RV32I-NEXT:  .LBB212_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB212_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB212_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB212_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB212_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB212_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB212_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -16544,7 +16419,7 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB212_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16574,24 +16449,19 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB212_4
-; RV32IA-NEXT:    j .LBB212_5
+; RV32IA-NEXT:    j .LBB212_4
 ; RV32IA-NEXT:  .LBB212_3: # in Loop: Header=BB212_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB212_5
 ; RV32IA-NEXT:  .LBB212_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB212_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB212_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB212_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB212_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB212_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB212_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -16602,7 +16472,7 @@ define i64 @atomicrmw_umax_i64_release(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB212_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16677,24 +16547,19 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB213_4
-; RV32I-NEXT:    j .LBB213_5
+; RV32I-NEXT:    j .LBB213_4
 ; RV32I-NEXT:  .LBB213_3: # in Loop: Header=BB213_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB213_5
 ; RV32I-NEXT:  .LBB213_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB213_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB213_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB213_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB213_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB213_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB213_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -16705,7 +16570,7 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB213_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16735,24 +16600,19 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB213_4
-; RV32IA-NEXT:    j .LBB213_5
+; RV32IA-NEXT:    j .LBB213_4
 ; RV32IA-NEXT:  .LBB213_3: # in Loop: Header=BB213_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB213_5
 ; RV32IA-NEXT:  .LBB213_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB213_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB213_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB213_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB213_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB213_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB213_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -16763,7 +16623,7 @@ define i64 @atomicrmw_umax_i64_acq_rel(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB213_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -16838,24 +16698,19 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32I-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s0, a1
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    beqz a0, .LBB214_4
-; RV32I-NEXT:    j .LBB214_5
+; RV32I-NEXT:    j .LBB214_4
 ; RV32I-NEXT:  .LBB214_3: # in Loop: Header=BB214_1 Depth=1
 ; RV32I-NEXT:    sltu a0, s2, a2
-; RV32I-NEXT:    sw a2, 0(sp)
-; RV32I-NEXT:    bnez a0, .LBB214_5
 ; RV32I-NEXT:  .LBB214_4: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB214_5: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32I-NEXT:    sw a2, 0(sp)
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB214_7
-; RV32I-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32I-NEXT:    bnez a0, .LBB214_6
+; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB214_7: # %atomicrmw.start
+; RV32I-NEXT:  .LBB214_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB214_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -16866,7 +16721,7 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB214_1
-; RV32I-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -16896,24 +16751,19 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32IA-NEXT:  # %bb.2: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s0, a1
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    beqz a0, .LBB214_4
-; RV32IA-NEXT:    j .LBB214_5
+; RV32IA-NEXT:    j .LBB214_4
 ; RV32IA-NEXT:  .LBB214_3: # in Loop: Header=BB214_1 Depth=1
 ; RV32IA-NEXT:    sltu a0, s2, a2
-; RV32IA-NEXT:    sw a2, 0(sp)
-; RV32IA-NEXT:    bnez a0, .LBB214_5
 ; RV32IA-NEXT:  .LBB214_4: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
-; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB214_5: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    sw a2, 0(sp)
 ; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB214_7
-; RV32IA-NEXT:  # %bb.6: # %atomicrmw.start
+; RV32IA-NEXT:    bnez a0, .LBB214_6
+; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
+; RV32IA-NEXT:    mv a2, s2
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB214_7: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB214_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB214_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -16924,7 +16774,7 @@ define i64 @atomicrmw_umax_i64_seq_cst(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB214_1
-; RV32IA-NEXT:  # %bb.8: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17006,18 +16856,13 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB215_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB215_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB215_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB215_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB215_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -17028,7 +16873,7 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB215_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17065,18 +16910,13 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB215_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB215_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB215_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB215_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB215_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB215_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -17087,7 +16927,7 @@ define i64 @atomicrmw_umin_i64_monotonic
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB215_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17169,18 +17009,13 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB216_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB216_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB216_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB216_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB216_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -17191,7 +17026,7 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB216_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17228,18 +17063,13 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB216_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB216_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB216_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB216_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB216_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB216_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -17250,7 +17080,7 @@ define i64 @atomicrmw_umin_i64_acquire(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB216_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17332,18 +17162,13 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB217_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB217_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB217_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB217_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB217_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -17354,7 +17179,7 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB217_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17391,18 +17216,13 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB217_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB217_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB217_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB217_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB217_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB217_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -17413,7 +17233,7 @@ define i64 @atomicrmw_umin_i64_release(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB217_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17495,18 +17315,13 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB218_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB218_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB218_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB218_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB218_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -17517,7 +17332,7 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB218_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17554,18 +17369,13 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB218_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB218_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB218_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB218_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB218_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB218_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -17576,7 +17386,7 @@ define i64 @atomicrmw_umin_i64_acq_rel(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB218_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)
@@ -17658,18 +17468,13 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32I-NEXT:    xori a0, a0, 1
 ; RV32I-NEXT:    sw a2, 0(sp)
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:    bnez a0, .LBB219_6
 ; RV32I-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:  .LBB219_6: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    bnez a0, .LBB219_8
-; RV32I-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:  .LBB219_8: # %atomicrmw.start
+; RV32I-NEXT:  .LBB219_6: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32I-NEXT:    sw a1, 4(sp)
 ; RV32I-NEXT:    mv a0, s1
@@ -17680,7 +17485,7 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32I-NEXT:    lw a1, 4(sp)
 ; RV32I-NEXT:    lw a2, 0(sp)
 ; RV32I-NEXT:    beqz a0, .LBB219_1
-; RV32I-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32I-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    lw s3, 12(sp)
 ; RV32I-NEXT:    lw s2, 16(sp)
@@ -17717,18 +17522,13 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32IA-NEXT:    xori a0, a0, 1
 ; RV32IA-NEXT:    sw a2, 0(sp)
+; RV32IA-NEXT:    mv a3, a1
 ; RV32IA-NEXT:    bnez a0, .LBB219_6
 ; RV32IA-NEXT:  # %bb.5: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32IA-NEXT:    mv a2, s2
-; RV32IA-NEXT:  .LBB219_6: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
-; RV32IA-NEXT:    mv a3, a1
-; RV32IA-NEXT:    bnez a0, .LBB219_8
-; RV32IA-NEXT:  # %bb.7: # %atomicrmw.start
-; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32IA-NEXT:    mv a3, s0
-; RV32IA-NEXT:  .LBB219_8: # %atomicrmw.start
+; RV32IA-NEXT:  .LBB219_6: # %atomicrmw.start
 ; RV32IA-NEXT:    # in Loop: Header=BB219_1 Depth=1
 ; RV32IA-NEXT:    sw a1, 4(sp)
 ; RV32IA-NEXT:    mv a0, s1
@@ -17739,7 +17539,7 @@ define i64 @atomicrmw_umin_i64_seq_cst(i
 ; RV32IA-NEXT:    lw a1, 4(sp)
 ; RV32IA-NEXT:    lw a2, 0(sp)
 ; RV32IA-NEXT:    beqz a0, .LBB219_1
-; RV32IA-NEXT:  # %bb.9: # %atomicrmw.end
+; RV32IA-NEXT:  # %bb.7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a2
 ; RV32IA-NEXT:    lw s3, 12(sp)
 ; RV32IA-NEXT:    lw s2, 16(sp)

Added: llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.ll?rev=356741&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.ll (added)
+++ llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.ll Fri Mar 22 03:45:03 2019
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+
+; Selects of wide values are split into two selects, which can easily cause
+; unnecessary control flow. Here we check some cases where we can currently
+; emit a sequence of selects with shared control flow.
+
+define i64 @cmovcc64(i32 signext %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: cmovcc64:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a5, zero, 123
+; RV32I-NEXT:    beq a0, a5, .LBB0_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    mv a2, a4
+; RV32I-NEXT:  .LBB0_2: # %entry
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmovcc64:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a3, zero, 123
+; RV64I-NEXT:    beq a0, a3, .LBB0_2
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    mv a1, a2
+; RV64I-NEXT:  .LBB0_2: # %entry
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %a, 123
+  %cond = select i1 %cmp, i64 %b, i64 %c
+  ret i64 %cond
+}
+
+define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind {
+; RV32I-LABEL: cmovcc128:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    xori a1, a1, 123
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    beqz a1, .LBB1_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    addi a1, a4, 4
+; RV32I-NEXT:    addi a2, a4, 8
+; RV32I-NEXT:    addi a5, a4, 12
+; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    j .LBB1_3
+; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:    addi a1, a3, 4
+; RV32I-NEXT:    addi a2, a3, 8
+; RV32I-NEXT:    addi a5, a3, 12
+; RV32I-NEXT:  .LBB1_3: # %entry
+; RV32I-NEXT:    lw a4, 0(a5)
+; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    lw a2, 0(a2)
+; RV32I-NEXT:    sw a2, 8(a0)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    lw a1, 0(a3)
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmovcc128:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a5, zero, 123
+; RV64I-NEXT:    beq a0, a5, .LBB1_2
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    mv a1, a3
+; RV64I-NEXT:    mv a2, a4
+; RV64I-NEXT:  .LBB1_2: # %entry
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    mv a1, a2
+; RV64I-NEXT:    ret
+entry:
+  %cmp = icmp eq i64 %a, 123
+  %cond = select i1 %cmp, i128 %b, i128 %c
+  ret i128 %cond
+}
+
+define i64 @cmov64(i1 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: cmov64:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    bnez a0, .LBB2_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    mv a2, a4
+; RV32I-NEXT:  .LBB2_2: # %entry
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmov64:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    bnez a0, .LBB2_2
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    mv a1, a2
+; RV64I-NEXT:  .LBB2_2: # %entry
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+entry:
+  %cond = select i1 %a, i64 %b, i64 %c
+  ret i64 %cond
+}
+
+define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind {
+; RV32I-LABEL: cmov128:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    andi a1, a1, 1
+; RV32I-NEXT:    bnez a1, .LBB3_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    addi a1, a3, 4
+; RV32I-NEXT:    addi a4, a3, 8
+; RV32I-NEXT:    addi a5, a3, 12
+; RV32I-NEXT:    mv a2, a3
+; RV32I-NEXT:    j .LBB3_3
+; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:    addi a1, a2, 4
+; RV32I-NEXT:    addi a4, a2, 8
+; RV32I-NEXT:    addi a5, a2, 12
+; RV32I-NEXT:  .LBB3_3: # %entry
+; RV32I-NEXT:    lw a3, 0(a5)
+; RV32I-NEXT:    sw a3, 12(a0)
+; RV32I-NEXT:    lw a3, 0(a4)
+; RV32I-NEXT:    sw a3, 8(a0)
+; RV32I-NEXT:    lw a1, 0(a1)
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    lw a1, 0(a2)
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmov128:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    bnez a0, .LBB3_2
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    mv a1, a3
+; RV64I-NEXT:    mv a2, a4
+; RV64I-NEXT:  .LBB3_2: # %entry
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    mv a1, a2
+; RV64I-NEXT:    ret
+entry:
+  %cond = select i1 %a, i128 %b, i128 %c
+  ret i128 %cond
+}
+
+define float @cmovfloat(i1 %a, float %b, float %c, float %d, float %e) nounwind {
+; RV32I-LABEL: cmovfloat:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    bnez a0, .LBB4_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    fmv.w.x ft0, a4
+; RV32I-NEXT:    fmv.w.x ft1, a2
+; RV32I-NEXT:    j .LBB4_3
+; RV32I-NEXT:  .LBB4_2:
+; RV32I-NEXT:    fmv.w.x ft0, a3
+; RV32I-NEXT:    fmv.w.x ft1, a1
+; RV32I-NEXT:  .LBB4_3: # %entry
+; RV32I-NEXT:    fadd.s ft0, ft1, ft0
+; RV32I-NEXT:    fmv.x.w a0, ft0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmovfloat:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    bnez a0, .LBB4_2
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    fmv.w.x ft0, a4
+; RV64I-NEXT:    fmv.w.x ft1, a2
+; RV64I-NEXT:    j .LBB4_3
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    fmv.w.x ft0, a3
+; RV64I-NEXT:    fmv.w.x ft1, a1
+; RV64I-NEXT:  .LBB4_3: # %entry
+; RV64I-NEXT:    fadd.s ft0, ft1, ft0
+; RV64I-NEXT:    fmv.x.w a0, ft0
+; RV64I-NEXT:    ret
+entry:
+  %cond1 = select i1 %a, float %b, float %c
+  %cond2 = select i1 %a, float %d, float %e
+  %ret = fadd float %cond1, %cond2
+  ret float %ret
+}
+
+define double @cmovdouble(i1 %a, double %b, double %c) nounwind {
+; RV32I-LABEL: cmovdouble:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw a3, 8(sp)
+; RV32I-NEXT:    sw a4, 12(sp)
+; RV32I-NEXT:    fld ft0, 8(sp)
+; RV32I-NEXT:    sw a1, 8(sp)
+; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    fld ft1, 8(sp)
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    bnez a0, .LBB5_2
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    fmv.d ft1, ft0
+; RV32I-NEXT:  .LBB5_2: # %entry
+; RV32I-NEXT:    fsd ft1, 8(sp)
+; RV32I-NEXT:    lw a0, 8(sp)
+; RV32I-NEXT:    lw a1, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmovdouble:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    bnez a0, .LBB5_2
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    fmv.d.x ft0, a2
+; RV64I-NEXT:    fmv.x.d a0, ft0
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    fmv.d.x ft0, a1
+; RV64I-NEXT:    fmv.x.d a0, ft0
+; RV64I-NEXT:    ret
+entry:
+  %cond = select i1 %a, double %b, double %c
+  ret double %cond
+}
+
+; Check that selects with dependencies on previous ones aren't incorrectly
+; optimized.
+
+define i32 @cmovccdep(i32 signext %a, i32 %b, i32 %c, i32 %d) nounwind {
+; RV32I-LABEL: cmovccdep:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi a4, zero, 123
+; RV32I-NEXT:    bne a0, a4, .LBB6_3
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:    bne a0, a4, .LBB6_4
+; RV32I-NEXT:  .LBB6_2: # %entry
+; RV32I-NEXT:    add a0, a1, a2
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB6_3: # %entry
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:    beq a0, a4, .LBB6_2
+; RV32I-NEXT:  .LBB6_4: # %entry
+; RV32I-NEXT:    mv a2, a3
+; RV32I-NEXT:    add a0, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmovccdep:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a4, zero, 123
+; RV64I-NEXT:    bne a0, a4, .LBB6_3
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    mv a2, a1
+; RV64I-NEXT:    bne a0, a4, .LBB6_4
+; RV64I-NEXT:  .LBB6_2: # %entry
+; RV64I-NEXT:    add a0, a1, a2
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB6_3: # %entry
+; RV64I-NEXT:    mv a1, a2
+; RV64I-NEXT:    mv a2, a1
+; RV64I-NEXT:    beq a0, a4, .LBB6_2
+; RV64I-NEXT:  .LBB6_4: # %entry
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    add a0, a1, a2
+; RV64I-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %a, 123
+  %cond1 = select i1 %cmp, i32 %b, i32 %c
+  %cond2 = select i1 %cmp, i32 %cond1, i32 %d
+  %ret = add i32 %cond1, %cond2
+  ret i32 %ret
+}
+
+; Check that selects with different conditions aren't incorrectly optimized.
+
+define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind {
+; RV32I-LABEL: cmovdiffcc:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    andi a1, a1, 1
+; RV32I-NEXT:    beqz a1, .LBB7_3
+; RV32I-NEXT:  # %bb.1: # %entry
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    beqz a0, .LBB7_4
+; RV32I-NEXT:  .LBB7_2: # %entry
+; RV32I-NEXT:    add a0, a2, a4
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB7_3: # %entry
+; RV32I-NEXT:    mv a4, a5
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    bnez a0, .LBB7_2
+; RV32I-NEXT:  .LBB7_4: # %entry
+; RV32I-NEXT:    mv a2, a3
+; RV32I-NEXT:    add a0, a2, a4
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: cmovdiffcc:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    andi a1, a1, 1
+; RV64I-NEXT:    beqz a1, .LBB7_3
+; RV64I-NEXT:  # %bb.1: # %entry
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    beqz a0, .LBB7_4
+; RV64I-NEXT:  .LBB7_2: # %entry
+; RV64I-NEXT:    add a0, a2, a4
+; RV64I-NEXT:    ret
+; RV64I-NEXT:  .LBB7_3: # %entry
+; RV64I-NEXT:    mv a4, a5
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    bnez a0, .LBB7_2
+; RV64I-NEXT:  .LBB7_4: # %entry
+; RV64I-NEXT:    mv a2, a3
+; RV64I-NEXT:    add a0, a2, a4
+; RV64I-NEXT:    ret
+entry:
+  %cond1 = select i1 %a, i32 %c, i32 %d
+  %cond2 = select i1 %b, i32 %e, i32 %f
+  %ret = add i32 %cond1, %cond2
+  ret i32 %ret
+}

Added: llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.mir?rev=356741&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.mir (added)
+++ llvm/trunk/test/CodeGen/RISCV/select-optimize-multiple.mir Fri Mar 22 03:45:03 2019
@@ -0,0 +1,191 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -run-pass=expand-isel-pseudos -simplify-mir -o - %s \
+# RUN:  | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -run-pass=expand-isel-pseudos -simplify-mir -o - %s \
+# RUN:  | FileCheck -check-prefix=RV64I %s
+
+# Provide dummy definitions of functions and just enough metadata to create a
+# DBG_VALUE.
+--- |
+  define void @cmov_interleaved_bad() {
+    ret void
+  }
+  define void @cmov_interleaved_debug_value() {
+    ret void
+  }
+  !1 = !DIExpression()
+...
+---
+# Here we have a sequence of select instructions with a non-select instruction
+# in the middle. Because the non-select depends on the result of a previous
+# select, we cannot optimize the sequence to share control-flow.
+name:            cmov_interleaved_bad
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+  - { id: 6, class: gpr }
+  - { id: 7, class: gpr }
+  - { id: 8, class: gpr }
+  - { id: 9, class: gpr }
+  - { id: 10, class: gpr }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+  - { reg: '$x11', virtual-reg: '%1' }
+  - { reg: '$x12', virtual-reg: '%2' }
+  - { reg: '$x13', virtual-reg: '%3' }
+body:             |
+  bb.0:
+    liveins: $x10, $x11, $x12, $x13
+
+    ; RV32I-LABEL: name: cmov_interleaved_bad
+    ; RV32I: successors: %bb.1, %bb.2
+    ; RV32I: liveins: $x10, $x11, $x12, $x13
+    ; RV32I: [[COPY:%[0-9]+]]:gpr = COPY $x13
+    ; RV32I: [[COPY1:%[0-9]+]]:gpr = COPY $x12
+    ; RV32I: [[COPY2:%[0-9]+]]:gpr = COPY $x11
+    ; RV32I: [[COPY3:%[0-9]+]]:gpr = COPY $x10
+    ; RV32I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1
+    ; RV32I: [[COPY4:%[0-9]+]]:gpr = COPY $x0
+    ; RV32I: BNE [[ANDI]], [[COPY4]], %bb.2
+    ; RV32I: .1:
+    ; RV32I: .2:
+    ; RV32I: successors: %bb.3, %bb.4
+    ; RV32I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1
+    ; RV32I: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1
+    ; RV32I: BNE [[ANDI]], [[COPY4]], %bb.4
+    ; RV32I: .3:
+    ; RV32I: .4:
+    ; RV32I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.2, [[COPY1]], %bb.3
+    ; RV32I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]]
+    ; RV32I: $x10 = COPY [[ADD]]
+    ; RV32I: PseudoRET implicit $x10
+    ; RV64I-LABEL: name: cmov_interleaved_bad
+    ; RV64I: successors: %bb.1, %bb.2
+    ; RV64I: liveins: $x10, $x11, $x12, $x13
+    ; RV64I: [[COPY:%[0-9]+]]:gpr = COPY $x13
+    ; RV64I: [[COPY1:%[0-9]+]]:gpr = COPY $x12
+    ; RV64I: [[COPY2:%[0-9]+]]:gpr = COPY $x11
+    ; RV64I: [[COPY3:%[0-9]+]]:gpr = COPY $x10
+    ; RV64I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1
+    ; RV64I: [[COPY4:%[0-9]+]]:gpr = COPY $x0
+    ; RV64I: BNE [[ANDI]], [[COPY4]], %bb.2
+    ; RV64I: .1:
+    ; RV64I: .2:
+    ; RV64I: successors: %bb.3, %bb.4
+    ; RV64I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1
+    ; RV64I: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1
+    ; RV64I: BNE [[ANDI]], [[COPY4]], %bb.4
+    ; RV64I: .3:
+    ; RV64I: .4:
+    ; RV64I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.2, [[COPY1]], %bb.3
+    ; RV64I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]]
+    ; RV64I: $x10 = COPY [[ADD]]
+    ; RV64I: PseudoRET implicit $x10
+    %3:gpr = COPY $x13
+    %2:gpr = COPY $x12
+    %1:gpr = COPY $x11
+    %0:gpr = COPY $x10
+    %5:gpr = ANDI %0, 1
+    %6:gpr = COPY $x0
+    %7:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %1, %2
+    %8:gpr = ADDI %7, 1
+    %9:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %3, %2
+    %10:gpr = ADD %7, killed %9
+    $x10 = COPY %10
+    PseudoRET implicit $x10
+
+...
+---
+# Demonstrate that debug info associated with selects is correctly moved to
+# the tail basic block, while debug info associated with non-selects is left
+# in the head basic block.
+name:            cmov_interleaved_debug_value
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr }
+  - { id: 1, class: gpr }
+  - { id: 2, class: gpr }
+  - { id: 3, class: gpr }
+  - { id: 4, class: gpr }
+  - { id: 5, class: gpr }
+  - { id: 6, class: gpr }
+  - { id: 7, class: gpr }
+  - { id: 8, class: gpr }
+  - { id: 9, class: gpr }
+  - { id: 10, class: gpr }
+liveins:
+  - { reg: '$x10', virtual-reg: '%0' }
+  - { reg: '$x11', virtual-reg: '%1' }
+  - { reg: '$x12', virtual-reg: '%2' }
+  - { reg: '$x13', virtual-reg: '%3' }
+body:             |
+  bb.0:
+    liveins: $x10, $x11, $x12, $x13
+
+    ; RV32I-LABEL: name: cmov_interleaved_debug_value
+    ; RV32I: successors: %bb.1, %bb.2
+    ; RV32I: liveins: $x10, $x11, $x12, $x13
+    ; RV32I: [[COPY:%[0-9]+]]:gpr = COPY $x13
+    ; RV32I: [[COPY1:%[0-9]+]]:gpr = COPY $x12
+    ; RV32I: [[COPY2:%[0-9]+]]:gpr = COPY $x11
+    ; RV32I: [[COPY3:%[0-9]+]]:gpr = COPY $x10
+    ; RV32I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1
+    ; RV32I: [[COPY4:%[0-9]+]]:gpr = COPY $x0
+    ; RV32I: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY3]], 1
+    ; RV32I: DBG_VALUE [[ADDI]], $noreg, !DIExpression(), !DIExpression()
+    ; RV32I: BNE [[ANDI]], [[COPY4]], %bb.2
+    ; RV32I: .1:
+    ; RV32I: .2:
+    ; RV32I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1
+    ; RV32I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1
+    ; RV32I: DBG_VALUE [[PHI]], $noreg, !DIExpression(), !DIExpression()
+    ; RV32I: DBG_VALUE [[PHI1]], $noreg, !DIExpression(), !DIExpression()
+    ; RV32I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]]
+    ; RV32I: $x10 = COPY [[ADD]]
+    ; RV32I: PseudoRET implicit $x10
+    ; RV64I-LABEL: name: cmov_interleaved_debug_value
+    ; RV64I: successors: %bb.1, %bb.2
+    ; RV64I: liveins: $x10, $x11, $x12, $x13
+    ; RV64I: [[COPY:%[0-9]+]]:gpr = COPY $x13
+    ; RV64I: [[COPY1:%[0-9]+]]:gpr = COPY $x12
+    ; RV64I: [[COPY2:%[0-9]+]]:gpr = COPY $x11
+    ; RV64I: [[COPY3:%[0-9]+]]:gpr = COPY $x10
+    ; RV64I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1
+    ; RV64I: [[COPY4:%[0-9]+]]:gpr = COPY $x0
+    ; RV64I: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY3]], 1
+    ; RV64I: DBG_VALUE [[ADDI]], $noreg, !DIExpression(), !DIExpression()
+    ; RV64I: BNE [[ANDI]], [[COPY4]], %bb.2
+    ; RV64I: .1:
+    ; RV64I: .2:
+    ; RV64I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1
+    ; RV64I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1
+    ; RV64I: DBG_VALUE [[PHI]], $noreg, !DIExpression(), !DIExpression()
+    ; RV64I: DBG_VALUE [[PHI1]], $noreg, !DIExpression(), !DIExpression()
+    ; RV64I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]]
+    ; RV64I: $x10 = COPY [[ADD]]
+    ; RV64I: PseudoRET implicit $x10
+    %3:gpr = COPY $x13
+    %2:gpr = COPY $x12
+    %1:gpr = COPY $x11
+    %0:gpr = COPY $x10
+    %5:gpr = ANDI %0, 1
+    %6:gpr = COPY $x0
+    %7:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %1, %2
+    DBG_VALUE %7, $noreg, !1, !1
+    %8:gpr = ADDI %0, 1
+    DBG_VALUE %8, $noreg, !1, !1
+    %9:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %3, %2
+    DBG_VALUE %9, $noreg, !1, !1
+    %10:gpr = ADD %7, killed %9
+    $x10 = COPY %10
+    PseudoRET implicit $x10
+
+...
+---




More information about the llvm-commits mailing list