[llvm] 109aa58 - [RISCV] Add an experimental pseudoinstruction to represent a rematerializable constant materialization sequence. (#69983)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 25 17:20:36 PDT 2023


Author: Craig Topper
Date: 2023-10-25T17:20:32-07:00
New Revision: 109aa586f073d27120b6c07afe673f30f58d9879

URL: https://github.com/llvm/llvm-project/commit/109aa586f073d27120b6c07afe673f30f58d9879
DIFF: https://github.com/llvm/llvm-project/commit/109aa586f073d27120b6c07afe673f30f58d9879.diff

LOG: [RISCV] Add an experimental pseudoinstruction to represent a rematerializable constant materialization sequence. (#69983)

Rematerialization during register allocation is currently limited to a
single instruction with no inputs.

This patch introduces a pseudoinstruction that represents the
materialization of a constant. I've started with a sequence of 2
instructions for now, which covers at least the common LUI+ADDI(W) case.
This instruction will be expanded into real instructions immediately
after register allocation using a new pass. This gives the post-RA
scheduler a chance to separate the 2 instructions to improve ILP.

I believe this matches the approach used by AArch64.

Unfortunately, this loses some CSE opportunies when an LUI value is used
by multiple constants with different LSBs.

This feature is off by default and a new backend command line option is
added to enable it for testing.

This avoids the spill and reloads reported in #69586.

Added: 
    llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
    llvm/test/CodeGen/RISCV/pr69586.ll

Modified: 
    llvm/lib/Target/RISCV/CMakeLists.txt
    llvm/lib/Target/RISCV/RISCV.h
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
    llvm/test/CodeGen/RISCV/O0-pipeline.ll
    llvm/test/CodeGen/RISCV/O3-pipeline.ll
    llvm/test/CodeGen/RISCV/imm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index fd5a5244486ab18..4d5fa79389ea68b 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen
   RISCVMacroFusion.cpp
   RISCVMergeBaseOffset.cpp
   RISCVOptWInstrs.cpp
+  RISCVPostRAExpandPseudoInsts.cpp
   RISCVRedundantCopyElimination.cpp
   RISCVMoveMerger.cpp
   RISCVPushPopOptimizer.cpp

diff  --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 0efc915ea52c550..3d8e33dc716ea44 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -63,6 +63,8 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
 FunctionPass *createRISCVInsertVSETVLIPass();
 void initializeRISCVInsertVSETVLIPass(PassRegistry &);
 
+FunctionPass *createRISCVPostRAExpandPseudoPass();
+void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
 FunctionPass *createRISCVInsertReadWriteCSRPass();
 void initializeRISCVInsertReadWriteCSRPass(PassRegistry &);
 

diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 81a1304cf1f405e..6c156057ccd7d0e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -29,6 +29,12 @@ using namespace llvm;
 #define DEBUG_TYPE "riscv-isel"
 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
 
+static cl::opt<bool> UsePseudoMovImm(
+    "riscv-use-rematerializable-movimm", cl::Hidden,
+    cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
+             "constant materialization"),
+    cl::init(false));
+
 namespace llvm::RISCV {
 #define GET_RISCVVSSEGTable_IMPL
 #define GET_RISCVVLSEGTable_IMPL
@@ -195,6 +201,13 @@ static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
   RISCVMatInt::InstSeq Seq =
       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
 
+  // Use a rematerializable pseudo instruction for short sequences if enabled.
+  if (Seq.size() == 2 && UsePseudoMovImm)
+    return SDValue(
+        CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
+                               CurDAG->getTargetConstant(Imm, DL, VT)),
+        0);
+
   // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
   // worst an LUI+ADDIW. This will require an extra register, but avoids a
   // constant pool.

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 460f43bf60a25f9..1a9242cff0b445d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1664,6 +1664,16 @@ def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), [],
                         "jump", "$target, $rd">,
                  Sched<[WriteIALU, WriteJalr, ReadJalr]>;
 
+// Pseudo for a rematerializable constant materialization sequence.
+// This is an experimental feature enabled by
+// -riscv-use-rematerializable-movimm in RISCVISelDAGToDAG.cpp
+// It will be expanded after register allocation.
+// FIXME: The scheduling information does not reflect the multiple instructions.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 1,
+    isPseudo = 1, isReMaterializable = 1, IsSignExtendingOpW = 1 in
+def PseudoMovImm : Pseudo<(outs GPR:$dst), (ins i32imm:$imm), []>,
+                   Sched<[WriteIALU]>;
+
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
     isAsmParserOnly = 1 in
 def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],

diff  --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
new file mode 100644
index 000000000000000..407e7cfd6fef830
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
@@ -0,0 +1,155 @@
+//===-- RISCVPostRAExpandPseudoInsts.cpp - Expand pseudo instrs ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands the pseudo instruction pseudolisimm32
+// into target instructions. This pass should be run during the post-regalloc
+// passes, before post RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVMatInt.h"
+#include "RISCV.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define RISCV_POST_RA_EXPAND_PSEUDO_NAME                                       \
+  "RISC-V post-regalloc pseudo instruction expansion pass"
+
+namespace {
+
+class RISCVPostRAExpandPseudo : public MachineFunctionPass {
+public:
+  const RISCVInstrInfo *TII;
+  static char ID;
+
+  RISCVPostRAExpandPseudo() : MachineFunctionPass(ID) {
+    initializeRISCVPostRAExpandPseudoPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return RISCV_POST_RA_EXPAND_PSEUDO_NAME;
+  }
+
+private:
+  bool expandMBB(MachineBasicBlock &MBB);
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NextMBBI);
+  bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+};
+
+char RISCVPostRAExpandPseudo::ID = 0;
+
+bool RISCVPostRAExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  bool Modified = false;
+  for (auto &MBB : MF)
+    Modified |= expandMBB(MBB);
+  return Modified;
+}
+
+bool RISCVPostRAExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       MachineBasicBlock::iterator &NextMBBI) {
+  switch (MBBI->getOpcode()) {
+  case RISCV::PseudoMovImm:
+    return expandMovImm(MBB, MBBI);
+  default:
+    return false;
+  }
+}
+
+bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MBBI) {
+  DebugLoc DL = MBBI->getDebugLoc();
+
+  int64_t Val = MBBI->getOperand(1).getImm();
+
+  RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(
+      Val, MBB.getParent()->getSubtarget().getFeatureBits());
+  assert(!Seq.empty());
+
+  Register SrcReg = RISCV::X0;
+  Register DstReg = MBBI->getOperand(0).getReg();
+  bool DstIsDead = MBBI->getOperand(0).isDead();
+  bool Renamable = MBBI->getOperand(0).isRenamable();
+  bool SrcRenamable = false;
+  unsigned Num = 0;
+
+  for (RISCVMatInt::Inst &Inst : Seq) {
+    bool LastItem = ++Num == Seq.size();
+    switch (Inst.getOpndKind()) {
+    case RISCVMatInt::Imm:
+      BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode()))
+          .addReg(DstReg, RegState::Define |
+                              getDeadRegState(DstIsDead && LastItem) |
+                              getRenamableRegState(Renamable))
+          .addImm(Inst.getImm());
+      break;
+    case RISCVMatInt::RegX0:
+      BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode()))
+          .addReg(DstReg, RegState::Define |
+                              getDeadRegState(DstIsDead && LastItem) |
+                              getRenamableRegState(Renamable))
+          .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable))
+          .addReg(RISCV::X0);
+      break;
+    case RISCVMatInt::RegReg:
+      BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode()))
+          .addReg(DstReg, RegState::Define |
+                              getDeadRegState(DstIsDead && LastItem) |
+                              getRenamableRegState(Renamable))
+          .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable))
+          .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable));
+      break;
+    case RISCVMatInt::RegImm:
+      BuildMI(MBB, MBBI, DL, TII->get(Inst.getOpcode()))
+          .addReg(DstReg, RegState::Define |
+                              getDeadRegState(DstIsDead && LastItem) |
+                              getRenamableRegState(Renamable))
+          .addReg(SrcReg, RegState::Kill | getRenamableRegState(SrcRenamable))
+          .addImm(Inst.getImm());
+      break;
+    }
+    // Only the first instruction has X0 as its source.
+    SrcReg = DstReg;
+    SrcRenamable = Renamable;
+  }
+  MBBI->eraseFromParent();
+  return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
+                RISCV_POST_RA_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createRISCVPostRAExpandPseudoPass() {
+  return new RISCVPostRAExpandPseudo();
+}
+
+} // end of namespace llvm

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 651d24bae57263d..953ac097b915044 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -96,6 +96,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVMakeCompressibleOptPass(*PR);
   initializeRISCVGatherScatterLoweringPass(*PR);
   initializeRISCVCodeGenPreparePass(*PR);
+  initializeRISCVPostRAExpandPseudoPass(*PR);
   initializeRISCVMergeBaseOffsetOptPass(*PR);
   initializeRISCVOptWInstrsPass(*PR);
   initializeRISCVPreRAExpandPseudoPass(*PR);
@@ -372,6 +373,8 @@ bool RISCVPassConfig::addGlobalInstructionSelect() {
 }
 
 void RISCVPassConfig::addPreSched2() {
+  addPass(createRISCVPostRAExpandPseudoPass());
+
   // Emit KCFI checks for indirect calls.
   addPass(createKCFIPass());
 }

diff  --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 01c7613201854a6..1d9af9df2f718f0 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -52,6 +52,7 @@
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       Prologue/Epilogue Insertion & Frame Finalization
 ; CHECK-NEXT:       Post-RA pseudo instruction expansion pass
+; CHECK-NEXT:       RISC-V post-regalloc pseudo instruction expansion pass
 ; CHECK-NEXT:       Insert KCFI indirect call checks
 ; CHECK-NEXT:       Analyze Machine Code For Garbage Collection
 ; CHECK-NEXT:       Insert fentry calls

diff  --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 30b6e1e541394d0..cf0826096bd41f8 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -156,6 +156,7 @@
 ; CHECK-NEXT:       Tail Duplication
 ; CHECK-NEXT:       Machine Copy Propagation Pass
 ; CHECK-NEXT:       Post-RA pseudo instruction expansion pass
+; CHECK-NEXT:       RISC-V post-regalloc pseudo instruction expansion pass
 ; CHECK-NEXT:       Insert KCFI indirect call checks
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction

diff  --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index e191933b42338aa..cafcf72c022ff4a 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -14,6 +14,11 @@
 ; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -mattr=+xtheadbb \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IXTHEADBB
 
+; RUN: llc -mtriple=riscv32 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \
+; RUN:   -riscv-use-rematerializable-movimm | FileCheck %s -check-prefix=RV32-REMAT
+; RUN: llc -mtriple=riscv64 -riscv-disable-using-constant-pool-for-large-ints -verify-machineinstrs < %s \
+; RUN:   -riscv-use-rematerializable-movimm | FileCheck %s -check-prefix=RV64-REMAT
+
 ; Materializing constants
 
 ; TODO: It would be preferable if anyext constant returns were sign rather
@@ -50,6 +55,16 @@ define signext i32 @zero() nounwind {
 ; RV64IXTHEADBB:       # %bb.0:
 ; RV64IXTHEADBB-NEXT:    li a0, 0
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: zero:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a0, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: zero:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, 0
+; RV64-REMAT-NEXT:    ret
   ret i32 0
 }
 
@@ -83,6 +98,16 @@ define signext i32 @pos_small() nounwind {
 ; RV64IXTHEADBB:       # %bb.0:
 ; RV64IXTHEADBB-NEXT:    li a0, 2047
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: pos_small:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a0, 2047
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: pos_small:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, 2047
+; RV64-REMAT-NEXT:    ret
   ret i32 2047
 }
 
@@ -116,6 +141,16 @@ define signext i32 @neg_small() nounwind {
 ; RV64IXTHEADBB:       # %bb.0:
 ; RV64IXTHEADBB-NEXT:    li a0, -2048
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: neg_small:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a0, -2048
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: neg_small:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -2048
+; RV64-REMAT-NEXT:    ret
   ret i32 -2048
 }
 
@@ -155,6 +190,18 @@ define signext i32 @pos_i32() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 423811
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1297
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: pos_i32:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 423811
+; RV32-REMAT-NEXT:    addi a0, a0, -1297
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: pos_i32:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 423811
+; RV64-REMAT-NEXT:    addiw a0, a0, -1297
+; RV64-REMAT-NEXT:    ret
   ret i32 1735928559
 }
 
@@ -194,6 +241,18 @@ define signext i32 @neg_i32() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 912092
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, -273
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: neg_i32:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 912092
+; RV32-REMAT-NEXT:    addi a0, a0, -273
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: neg_i32:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 912092
+; RV64-REMAT-NEXT:    addiw a0, a0, -273
+; RV64-REMAT-NEXT:    ret
   ret i32 -559038737
 }
 
@@ -227,6 +286,16 @@ define signext i32 @pos_i32_hi20_only() nounwind {
 ; RV64IXTHEADBB:       # %bb.0:
 ; RV64IXTHEADBB-NEXT:    lui a0, 16
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: pos_i32_hi20_only:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 16
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: pos_i32_hi20_only:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 16
+; RV64-REMAT-NEXT:    ret
   ret i32 65536 ; 0x10000
 }
 
@@ -260,6 +329,16 @@ define signext i32 @neg_i32_hi20_only() nounwind {
 ; RV64IXTHEADBB:       # %bb.0:
 ; RV64IXTHEADBB-NEXT:    lui a0, 1048560
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: neg_i32_hi20_only:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 1048560
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: neg_i32_hi20_only:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1048560
+; RV64-REMAT-NEXT:    ret
   ret i32 -65536 ; -0x10000
 }
 
@@ -301,6 +380,18 @@ define signext i32 @imm_left_shifted_addi() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 32
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, -64
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_left_shifted_addi:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 32
+; RV32-REMAT-NEXT:    addi a0, a0, -64
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_left_shifted_addi:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 32
+; RV64-REMAT-NEXT:    addiw a0, a0, -64
+; RV64-REMAT-NEXT:    ret
   ret i32 131008 ; 0x1FFC0
 }
 
@@ -342,6 +433,18 @@ define signext i32 @imm_right_shifted_addi() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 524288
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_right_shifted_addi:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 524288
+; RV32-REMAT-NEXT:    addi a0, a0, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_right_shifted_addi:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 524288
+; RV64-REMAT-NEXT:    addiw a0, a0, -1
+; RV64-REMAT-NEXT:    ret
   ret i32 2147483647 ; 0x7FFFFFFF
 }
 
@@ -383,6 +486,18 @@ define signext i32 @imm_right_shifted_lui() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 56
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, 580
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_right_shifted_lui:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 56
+; RV32-REMAT-NEXT:    addi a0, a0, 580
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_right_shifted_lui:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 56
+; RV64-REMAT-NEXT:    addiw a0, a0, 580
+; RV64-REMAT-NEXT:    ret
   ret i32 229956 ; 0x38244
 }
 
@@ -421,6 +536,18 @@ define i64 @imm64_1() nounwind {
 ; RV64IXTHEADBB-NEXT:    li a0, 1
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 524288
+; RV32-REMAT-NEXT:    li a1, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, 1
+; RV64-REMAT-NEXT:    slli a0, a0, 31
+; RV64-REMAT-NEXT:    ret
   ret i64 2147483648 ; 0x8000_0000
 }
 
@@ -460,6 +587,18 @@ define i64 @imm64_2() nounwind {
 ; RV64IXTHEADBB-NEXT:    li a0, -1
 ; RV64IXTHEADBB-NEXT:    srli a0, a0, 32
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_2:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a0, -1
+; RV32-REMAT-NEXT:    li a1, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_2:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -1
+; RV64-REMAT-NEXT:    srli a0, a0, 32
+; RV64-REMAT-NEXT:    ret
   ret i64 4294967295 ; 0xFFFF_FFFF
 }
 
@@ -498,6 +637,18 @@ define i64 @imm64_3() nounwind {
 ; RV64IXTHEADBB-NEXT:    li a0, 1
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 32
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_3:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a1, 1
+; RV32-REMAT-NEXT:    li a0, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_3:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, 1
+; RV64-REMAT-NEXT:    slli a0, a0, 32
+; RV64-REMAT-NEXT:    ret
   ret i64 4294967296 ; 0x1_0000_0000
 }
 
@@ -536,6 +687,18 @@ define i64 @imm64_4() nounwind {
 ; RV64IXTHEADBB-NEXT:    li a0, -1
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_4:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a1, 524288
+; RV32-REMAT-NEXT:    li a0, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_4:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -1
+; RV64-REMAT-NEXT:    slli a0, a0, 63
+; RV64-REMAT-NEXT:    ret
   ret i64 9223372036854775808 ; 0x8000_0000_0000_0000
 }
 
@@ -574,6 +737,18 @@ define i64 @imm64_5() nounwind {
 ; RV64IXTHEADBB-NEXT:    li a0, -1
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 63
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_5:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a1, 524288
+; RV32-REMAT-NEXT:    li a0, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_5:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -1
+; RV64-REMAT-NEXT:    slli a0, a0, 63
+; RV64-REMAT-NEXT:    ret
   ret i64 -9223372036854775808 ; 0x8000_0000_0000_0000
 }
 
@@ -619,6 +794,20 @@ define i64 @imm64_6() nounwind {
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1329
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_6:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a1, 74565
+; RV32-REMAT-NEXT:    addi a1, a1, 1656
+; RV32-REMAT-NEXT:    li a0, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_6:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 9321
+; RV64-REMAT-NEXT:    addi a0, a0, -1329
+; RV64-REMAT-NEXT:    slli a0, a0, 35
+; RV64-REMAT-NEXT:    ret
   ret i64 1311768464867721216 ; 0x1234_5678_0000_0000
 }
 
@@ -674,6 +863,22 @@ define i64 @imm64_7() nounwind {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 15
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_7:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 45056
+; RV32-REMAT-NEXT:    addi a0, a0, 15
+; RV32-REMAT-NEXT:    lui a1, 458752
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_7:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, 7
+; RV64-REMAT-NEXT:    slli a0, a0, 36
+; RV64-REMAT-NEXT:    addi a0, a0, 11
+; RV64-REMAT-NEXT:    slli a0, a0, 24
+; RV64-REMAT-NEXT:    addi a0, a0, 15
+; RV64-REMAT-NEXT:    ret
   ret i64 8070450532432478223 ; 0x7000_0000_0B00_000F
 }
 
@@ -752,6 +957,26 @@ define i64 @imm64_8() nounwind {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -272
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_8:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 633806
+; RV32-REMAT-NEXT:    addi a0, a0, -272
+; RV32-REMAT-NEXT:    lui a1, 74565
+; RV32-REMAT-NEXT:    addi a1, a1, 1656
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_8:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 583
+; RV64-REMAT-NEXT:    addiw a0, a0, -1875
+; RV64-REMAT-NEXT:    slli a0, a0, 14
+; RV64-REMAT-NEXT:    addi a0, a0, -947
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1511
+; RV64-REMAT-NEXT:    slli a0, a0, 13
+; RV64-REMAT-NEXT:    addi a0, a0, -272
+; RV64-REMAT-NEXT:    ret
   ret i64 1311768467463790320 ; 0x1234_5678_9ABC_DEF0
 }
 
@@ -786,6 +1011,17 @@ define i64 @imm64_9() nounwind {
 ; RV64IXTHEADBB:       # %bb.0:
 ; RV64IXTHEADBB-NEXT:    li a0, -1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_9:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a0, -1
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_9:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -1
+; RV64-REMAT-NEXT:    ret
   ret i64 -1
 }
 
@@ -828,6 +1064,18 @@ define i64 @imm_left_shifted_lui_1() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 262145
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_left_shifted_lui_1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 524290
+; RV32-REMAT-NEXT:    li a1, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_left_shifted_lui_1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 262145
+; RV64-REMAT-NEXT:    slli a0, a0, 1
+; RV64-REMAT-NEXT:    ret
   ret i64 2147491840 ; 0x8000_2000
 }
 
@@ -867,6 +1115,18 @@ define i64 @imm_left_shifted_lui_2() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 262145
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_left_shifted_lui_2:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 4
+; RV32-REMAT-NEXT:    li a1, 1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_left_shifted_lui_2:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 262145
+; RV64-REMAT-NEXT:    slli a0, a0, 2
+; RV64-REMAT-NEXT:    ret
   ret i64 4294983680 ; 0x1_0000_4000
 }
 
@@ -907,6 +1167,19 @@ define i64 @imm_left_shifted_lui_3() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 4097
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_left_shifted_lui_3:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a1, 1
+; RV32-REMAT-NEXT:    addi a1, a1, 1
+; RV32-REMAT-NEXT:    li a0, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_left_shifted_lui_3:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 4097
+; RV64-REMAT-NEXT:    slli a0, a0, 20
+; RV64-REMAT-NEXT:    ret
   ret i64 17596481011712 ; 0x1001_0000_0000
 }
 
@@ -951,6 +1224,20 @@ define i64 @imm_right_shifted_lui_1() nounwind {
 ; RV64IXTHEADBB-NEXT:    lui a0, 983056
 ; RV64IXTHEADBB-NEXT:    srli a0, a0, 16
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_right_shifted_lui_1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 1048575
+; RV32-REMAT-NEXT:    addi a0, a0, 1
+; RV32-REMAT-NEXT:    lui a1, 16
+; RV32-REMAT-NEXT:    addi a1, a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_right_shifted_lui_1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 983056
+; RV64-REMAT-NEXT:    srli a0, a0, 16
+; RV64-REMAT-NEXT:    ret
   ret i64 281474976706561 ; 0xFFFF_FFFF_F001
 }
 
@@ -996,6 +1283,20 @@ define i64 @imm_right_shifted_lui_2() nounwind {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    srli a0, a0, 24
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_right_shifted_lui_2:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 1048575
+; RV32-REMAT-NEXT:    addi a0, a0, 1
+; RV32-REMAT-NEXT:    li a1, 255
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_right_shifted_lui_2:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1044481
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    srli a0, a0, 24
+; RV64-REMAT-NEXT:    ret
   ret i64 1099511623681 ; 0xFF_FFFF_F001
 }
 
@@ -1043,6 +1344,19 @@ define i64 @imm_decoupled_lui_addi() nounwind {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 20
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -3
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_decoupled_lui_addi:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a0, -3
+; RV32-REMAT-NEXT:    lui a1, 1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_decoupled_lui_addi:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 4097
+; RV64-REMAT-NEXT:    slli a0, a0, 20
+; RV64-REMAT-NEXT:    addi a0, a0, -3
+; RV64-REMAT-NEXT:    ret
   ret i64 17596481011709 ; 0x1000_FFFF_FFFD
 }
 
@@ -1090,6 +1404,20 @@ define i64 @imm_end_xori_1() nounwind {
 ; RV64IXTHEADBB-NEXT:    srli a0, a0, 3
 ; RV64IXTHEADBB-NEXT:    not a0, a0
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_end_xori_1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 8192
+; RV32-REMAT-NEXT:    addi a0, a0, -1
+; RV32-REMAT-NEXT:    lui a1, 917504
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_end_xori_1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 983040
+; RV64-REMAT-NEXT:    srli a0, a0, 3
+; RV64-REMAT-NEXT:    not a0, a0
+; RV64-REMAT-NEXT:    ret
   ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
 }
 
@@ -1143,6 +1471,22 @@ define i64 @imm_end_2addi_1() nounwind {
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -2048
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_end_2addi_1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 1048575
+; RV32-REMAT-NEXT:    addi a0, a0, 2047
+; RV32-REMAT-NEXT:    lui a1, 1048512
+; RV32-REMAT-NEXT:    addi a1, a1, 127
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_end_2addi_1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -2047
+; RV64-REMAT-NEXT:    slli a0, a0, 39
+; RV64-REMAT-NEXT:    addi a0, a0, -2048
+; RV64-REMAT-NEXT:    addi a0, a0, -1
+; RV64-REMAT-NEXT:    ret
   ret i64 -1125350151030785 ; 0xFFFC_007F_FFFF_F7FF
 }
 
@@ -1196,6 +1540,21 @@ define i64 @imm_2reg_1() nounwind {
 ; RV64IXTHEADBB-NEXT:    slli a1, a0, 57
 ; RV64IXTHEADBB-NEXT:    add a0, a0, a1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_2reg_1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 74565
+; RV32-REMAT-NEXT:    addi a0, a0, 1656
+; RV32-REMAT-NEXT:    lui a1, 983040
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_2reg_1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 74565
+; RV64-REMAT-NEXT:    addiw a0, a0, 1656
+; RV64-REMAT-NEXT:    slli a1, a0, 57
+; RV64-REMAT-NEXT:    add a0, a0, a1
+; RV64-REMAT-NEXT:    ret
   ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
 }
 
@@ -1236,6 +1595,18 @@ define void @imm_store_i16_neg1(ptr %p) nounwind {
 ; RV64IXTHEADBB-NEXT:    li a1, -1
 ; RV64IXTHEADBB-NEXT:    sh a1, 0(a0)
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_store_i16_neg1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    sh a1, 0(a0)
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_store_i16_neg1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a1, -1
+; RV64-REMAT-NEXT:    sh a1, 0(a0)
+; RV64-REMAT-NEXT:    ret
   store i16 -1, ptr %p
   ret void
 }
@@ -1277,6 +1648,18 @@ define void @imm_store_i32_neg1(ptr %p) nounwind {
 ; RV64IXTHEADBB-NEXT:    li a1, -1
 ; RV64IXTHEADBB-NEXT:    sw a1, 0(a0)
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_store_i32_neg1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    sw a1, 0(a0)
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_store_i32_neg1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a1, -1
+; RV64-REMAT-NEXT:    sw a1, 0(a0)
+; RV64-REMAT-NEXT:    ret
   store i32 -1, ptr %p
   ret void
 }
@@ -1326,6 +1709,21 @@ define i64 @imm_5372288229() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -795
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_5372288229:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 263018
+; RV32-REMAT-NEXT:    addi a0, a0, -795
+; RV32-REMAT-NEXT:    li a1, 1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_5372288229:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 160
+; RV64-REMAT-NEXT:    addiw a0, a0, 437
+; RV64-REMAT-NEXT:    slli a0, a0, 13
+; RV64-REMAT-NEXT:    addi a0, a0, -795
+; RV64-REMAT-NEXT:    ret
   ret i64 5372288229
 }
 
@@ -1374,6 +1772,21 @@ define i64 @imm_neg_5372288229() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 795
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_5372288229:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 785558
+; RV32-REMAT-NEXT:    addi a0, a0, 795
+; RV32-REMAT-NEXT:    li a1, -2
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_5372288229:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1048416
+; RV64-REMAT-NEXT:    addiw a0, a0, -437
+; RV64-REMAT-NEXT:    slli a0, a0, 13
+; RV64-REMAT-NEXT:    addi a0, a0, 795
+; RV64-REMAT-NEXT:    ret
   ret i64 -5372288229
 }
 
@@ -1422,6 +1835,21 @@ define i64 @imm_8953813715() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1325
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_8953813715:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 88838
+; RV32-REMAT-NEXT:    addi a0, a0, -1325
+; RV32-REMAT-NEXT:    li a1, 2
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_8953813715:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 267
+; RV64-REMAT-NEXT:    addiw a0, a0, -637
+; RV64-REMAT-NEXT:    slli a0, a0, 13
+; RV64-REMAT-NEXT:    addi a0, a0, -1325
+; RV64-REMAT-NEXT:    ret
   ret i64 8953813715
 }
 
@@ -1470,6 +1898,21 @@ define i64 @imm_neg_8953813715() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1325
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_8953813715:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 959738
+; RV32-REMAT-NEXT:    addi a0, a0, 1325
+; RV32-REMAT-NEXT:    li a1, -3
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_8953813715:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1048309
+; RV64-REMAT-NEXT:    addiw a0, a0, 637
+; RV64-REMAT-NEXT:    slli a0, a0, 13
+; RV64-REMAT-NEXT:    addi a0, a0, 1325
+; RV64-REMAT-NEXT:    ret
   ret i64 -8953813715
 }
 
@@ -1519,6 +1962,21 @@ define i64 @imm_16116864687() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1711
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_16116864687:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 789053
+; RV32-REMAT-NEXT:    addi a0, a0, 1711
+; RV32-REMAT-NEXT:    li a1, 3
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_16116864687:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 961
+; RV64-REMAT-NEXT:    addiw a0, a0, -1475
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1711
+; RV64-REMAT-NEXT:    ret
   ret i64 16116864687
 }
 
@@ -1568,6 +2026,21 @@ define i64 @imm_neg_16116864687() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1711
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_16116864687:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 259523
+; RV32-REMAT-NEXT:    addi a0, a0, -1711
+; RV32-REMAT-NEXT:    li a1, -4
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_16116864687:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1047615
+; RV64-REMAT-NEXT:    addiw a0, a0, 1475
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, -1711
+; RV64-REMAT-NEXT:    ret
   ret i64 -16116864687
 }
 
@@ -1613,6 +2086,20 @@ define i64 @imm_2344336315() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_2344336315:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 572348
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    li a1, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_2344336315:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 143087
+; RV64-REMAT-NEXT:    slli a0, a0, 2
+; RV64-REMAT-NEXT:    addi a0, a0, -1093
+; RV64-REMAT-NEXT:    ret
   ret i64 2344336315 ; 0x8bbbbbbb
 }
 
@@ -1676,6 +2163,23 @@ define i64 @imm_70370820078523() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_70370820078523:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 506812
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    lui a1, 4
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_70370820078523:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 256
+; RV64-REMAT-NEXT:    addiw a0, a0, 31
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, -273
+; RV64-REMAT-NEXT:    slli a0, a0, 14
+; RV64-REMAT-NEXT:    addi a0, a0, -1093
+; RV64-REMAT-NEXT:    ret
   ret i64 70370820078523 ; 0x40007bbbbbbb
 }
 
@@ -1725,6 +2229,21 @@ define i64 @imm_neg_9223372034778874949() {
 ; RV64IXTHEADBB-NEXT:    slli a1, a0, 63
 ; RV64IXTHEADBB-NEXT:    add a0, a0, a1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_9223372034778874949:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 506812
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    lui a1, 524288
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_9223372034778874949:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 506812
+; RV64-REMAT-NEXT:    addiw a0, a0, -1093
+; RV64-REMAT-NEXT:    slli a1, a0, 63
+; RV64-REMAT-NEXT:    add a0, a0, a1
+; RV64-REMAT-NEXT:    ret
   ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
 }
 
@@ -1793,6 +2312,24 @@ define i64 @imm_neg_9223301666034697285() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_9223301666034697285:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 506812
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    lui a1, 524292
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_9223301666034697285:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 917505
+; RV64-REMAT-NEXT:    slli a0, a0, 8
+; RV64-REMAT-NEXT:    addi a0, a0, 31
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, -273
+; RV64-REMAT-NEXT:    slli a0, a0, 14
+; RV64-REMAT-NEXT:    addi a0, a0, -1093
+; RV64-REMAT-NEXT:    ret
   ret i64 -9223301666034697285 ; 0x800040007bbbbbbb
 }
 
@@ -1838,6 +2375,20 @@ define i64 @imm_neg_2219066437() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 2
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_2219066437:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 506812
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_2219066437:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 913135
+; RV64-REMAT-NEXT:    slli a0, a0, 2
+; RV64-REMAT-NEXT:    addi a0, a0, -1093
+; RV64-REMAT-NEXT:    ret
   ret i64 -2219066437 ; 0xffffffff7bbbbbbb
 }
 
@@ -1888,6 +2439,22 @@ define i64 @imm_neg_8798043653189() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_8798043653189:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 572348
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    lui a1, 1048575
+; RV32-REMAT-NEXT:    addi a1, a1, 2047
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_8798043653189:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 917475
+; RV64-REMAT-NEXT:    addiw a0, a0, -273
+; RV64-REMAT-NEXT:    slli a0, a0, 14
+; RV64-REMAT-NEXT:    addi a0, a0, -1093
+; RV64-REMAT-NEXT:    ret
   ret i64 -8798043653189 ; 0xfffff7ff8bbbbbbb
 }
 
@@ -1938,6 +2505,22 @@ define i64 @imm_9223372034904144827() {
 ; RV64IXTHEADBB-NEXT:    slli a1, a0, 63
 ; RV64IXTHEADBB-NEXT:    add a0, a0, a1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_9223372034904144827:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 572348
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    lui a1, 524288
+; RV32-REMAT-NEXT:    addi a1, a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_9223372034904144827:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 572348
+; RV64-REMAT-NEXT:    addiw a0, a0, -1093
+; RV64-REMAT-NEXT:    slli a1, a0, 63
+; RV64-REMAT-NEXT:    add a0, a0, a1
+; RV64-REMAT-NEXT:    ret
   ret i64 9223372034904144827 ; 0x7fffffff8bbbbbbb
 }
 
@@ -2007,6 +2590,25 @@ define i64 @imm_neg_9223354442718100411() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_9223354442718100411:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 572348
+; RV32-REMAT-NEXT:    addi a0, a0, -1093
+; RV32-REMAT-NEXT:    lui a1, 524287
+; RV32-REMAT-NEXT:    addi a1, a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_9223354442718100411:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 524287
+; RV64-REMAT-NEXT:    slli a0, a0, 6
+; RV64-REMAT-NEXT:    addi a0, a0, -29
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, -273
+; RV64-REMAT-NEXT:    slli a0, a0, 14
+; RV64-REMAT-NEXT:    addi a0, a0, -1093
+; RV64-REMAT-NEXT:    ret
   ret i64 9223354442718100411 ; 0x7fffefff8bbbbbbb
 }
 
@@ -2052,6 +2654,20 @@ define i64 @imm_2863311530() {
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1365
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_2863311530:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 699051
+; RV32-REMAT-NEXT:    addi a0, a0, -1366
+; RV32-REMAT-NEXT:    li a1, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_2863311530:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 349525
+; RV64-REMAT-NEXT:    addiw a0, a0, 1365
+; RV64-REMAT-NEXT:    slli a0, a0, 1
+; RV64-REMAT-NEXT:    ret
 	ret i64 2863311530 ; #0xaaaaaaaa
 }
 
@@ -2097,6 +2713,20 @@ define i64 @imm_neg_2863311530() {
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1365
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_2863311530:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 349525
+; RV32-REMAT-NEXT:    addi a0, a0, 1366
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_2863311530:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 699051
+; RV64-REMAT-NEXT:    addiw a0, a0, -1365
+; RV64-REMAT-NEXT:    slli a0, a0, 1
+; RV64-REMAT-NEXT:    ret
 	ret i64 -2863311530 ; #0xffffffff55555556
 }
 
@@ -2141,6 +2771,20 @@ define i64 @imm_2147486378() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 31
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_2147486378:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 524288
+; RV32-REMAT-NEXT:    addi a0, a0, 1365
+; RV32-REMAT-NEXT:    li a1, 0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_2147486378:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, 1
+; RV64-REMAT-NEXT:    slli a0, a0, 31
+; RV64-REMAT-NEXT:    addi a0, a0, 1365
+; RV64-REMAT-NEXT:    ret
   ret i64 2147485013
 }
 
@@ -2181,6 +2825,19 @@ define i64 @imm_neg_2147485013() {
 ; RV64IXTHEADBB-NEXT:    lui a0, 524288
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1365
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_2147485013:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 524288
+; RV32-REMAT-NEXT:    addi a0, a0, -1365
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_2147485013:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 524288
+; RV64-REMAT-NEXT:    addi a0, a0, -1365
+; RV64-REMAT-NEXT:    ret
   ret i64 -2147485013
 }
 
@@ -2231,6 +2888,22 @@ define i64 @imm_12900924131259() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 24
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1979
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_12900924131259:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 765952
+; RV32-REMAT-NEXT:    addi a0, a0, 1979
+; RV32-REMAT-NEXT:    lui a1, 1
+; RV32-REMAT-NEXT:    addi a1, a1, -1093
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_12900924131259:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 188
+; RV64-REMAT-NEXT:    addiw a0, a0, -1093
+; RV64-REMAT-NEXT:    slli a0, a0, 24
+; RV64-REMAT-NEXT:    addi a0, a0, 1979
+; RV64-REMAT-NEXT:    ret
   ret i64 12900924131259
 }
 
@@ -2274,6 +2947,19 @@ define i64 @imm_50394234880() {
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1093
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 16
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_50394234880:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 768944
+; RV32-REMAT-NEXT:    li a1, 11
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_50394234880:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 188
+; RV64-REMAT-NEXT:    addiw a0, a0, -1093
+; RV64-REMAT-NEXT:    slli a0, a0, 16
+; RV64-REMAT-NEXT:    ret
   ret i64 50394234880
 }
 
@@ -2329,6 +3015,23 @@ define i64 @imm_12900936431479() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1911
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_12900936431479:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 768955
+; RV32-REMAT-NEXT:    addi a0, a0, 1911
+; RV32-REMAT-NEXT:    lui a1, 1
+; RV32-REMAT-NEXT:    addi a1, a1, -1093
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_12900936431479:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 192239
+; RV64-REMAT-NEXT:    slli a0, a0, 2
+; RV64-REMAT-NEXT:    addi a0, a0, -1093
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1911
+; RV64-REMAT-NEXT:    ret
   ret i64 12900936431479
 }
 
@@ -2384,6 +3087,23 @@ define i64 @imm_12900918536874() {
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1365
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_12900918536874:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 764587
+; RV32-REMAT-NEXT:    addi a0, a0, -1366
+; RV32-REMAT-NEXT:    lui a1, 1
+; RV32-REMAT-NEXT:    addi a1, a1, -1093
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_12900918536874:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 384477
+; RV64-REMAT-NEXT:    addiw a0, a0, 1365
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1365
+; RV64-REMAT-NEXT:    slli a0, a0, 1
+; RV64-REMAT-NEXT:    ret
   ret i64 12900918536874
 }
 
@@ -2439,6 +3159,23 @@ define i64 @imm_12900925247761() {
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -2048
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, -1775
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_12900925247761:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 766225
+; RV32-REMAT-NEXT:    addi a0, a0, 273
+; RV32-REMAT-NEXT:    lui a1, 1
+; RV32-REMAT-NEXT:    addi a1, a1, -1093
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_12900925247761:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 384478
+; RV64-REMAT-NEXT:    addiw a0, a0, -1911
+; RV64-REMAT-NEXT:    slli a0, a0, 13
+; RV64-REMAT-NEXT:    addi a0, a0, -2048
+; RV64-REMAT-NEXT:    addi a0, a0, -1775
+; RV64-REMAT-NEXT:    ret
   ret i64 12900925247761
 }
 
@@ -2488,6 +3225,21 @@ define i64 @imm_7158272001() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_7158272001:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 699049
+; RV32-REMAT-NEXT:    addi a0, a0, 1
+; RV32-REMAT-NEXT:    li a1, 1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_7158272001:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 427
+; RV64-REMAT-NEXT:    addiw a0, a0, -1367
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1
+; RV64-REMAT-NEXT:    ret
   ret i64 7158272001 ; 0x0000_0001_aaaa_9001
 }
 
@@ -2537,6 +3289,21 @@ define i64 @imm_12884889601() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_12884889601:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 1048573
+; RV32-REMAT-NEXT:    addi a0, a0, 1
+; RV32-REMAT-NEXT:    li a1, 2
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_12884889601:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 768
+; RV64-REMAT-NEXT:    addiw a0, a0, -3
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1
+; RV64-REMAT-NEXT:    ret
   ret i64 12884889601 ; 0x0000_0002_ffff_d001
 }
 
@@ -2585,6 +3352,21 @@ define i64 @imm_neg_3435982847() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_3435982847:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 209713
+; RV32-REMAT-NEXT:    addi a0, a0, 1
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_3435982847:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1048371
+; RV64-REMAT-NEXT:    addiw a0, a0, 817
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1
+; RV64-REMAT-NEXT:    ret
   ret i64 -3435982847 ; 0xffff_ffff_3333_1001
 }
 
@@ -2633,6 +3415,21 @@ define i64 @imm_neg_5726842879() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_5726842879:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 698997
+; RV32-REMAT-NEXT:    addi a0, a0, 1
+; RV32-REMAT-NEXT:    li a1, -2
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_5726842879:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1048235
+; RV64-REMAT-NEXT:    addiw a0, a0, -1419
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1
+; RV64-REMAT-NEXT:    ret
   ret i64 -5726842879 ; 0xffff_fffe_aaa7_5001
 }
 
@@ -2681,6 +3478,21 @@ define i64 @imm_neg_10307948543() {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm_neg_10307948543:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 629139
+; RV32-REMAT-NEXT:    addi a0, a0, 1
+; RV32-REMAT-NEXT:    li a1, -3
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm_neg_10307948543:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1047962
+; RV64-REMAT-NEXT:    addiw a0, a0, -1645
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    addi a0, a0, 1
+; RV64-REMAT-NEXT:    ret
   ret i64 -10307948543 ; 0xffff_fffd_9999_3001
 }
 
@@ -2724,6 +3536,20 @@ define i64 @li_rori_1() {
 ; RV64IXTHEADBB-NEXT:    li a0, -18
 ; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 21
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: li_rori_1:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a1, 1048567
+; RV32-REMAT-NEXT:    addi a1, a1, 2047
+; RV32-REMAT-NEXT:    li a0, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: li_rori_1:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -17
+; RV64-REMAT-NEXT:    slli a0, a0, 43
+; RV64-REMAT-NEXT:    addi a0, a0, -1
+; RV64-REMAT-NEXT:    ret
   ret i64 -149533581377537
 }
 
@@ -2767,6 +3593,20 @@ define i64 @li_rori_2() {
 ; RV64IXTHEADBB-NEXT:    li a0, -86
 ; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 4
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: li_rori_2:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a1, 720896
+; RV32-REMAT-NEXT:    addi a1, a1, -1
+; RV32-REMAT-NEXT:    li a0, -6
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: li_rori_2:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -5
+; RV64-REMAT-NEXT:    slli a0, a0, 60
+; RV64-REMAT-NEXT:    addi a0, a0, -6
+; RV64-REMAT-NEXT:    ret
   ret i64 -5764607523034234886
 }
 
@@ -2810,6 +3650,20 @@ define i64 @li_rori_3() {
 ; RV64IXTHEADBB-NEXT:    li a0, -18
 ; RV64IXTHEADBB-NEXT:    th.srri a0, a0, 37
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: li_rori_3:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 491520
+; RV32-REMAT-NEXT:    addi a0, a0, -1
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: li_rori_3:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, -17
+; RV64-REMAT-NEXT:    slli a0, a0, 27
+; RV64-REMAT-NEXT:    addi a0, a0, -1
+; RV64-REMAT-NEXT:    ret
   ret i64 -2281701377
 }
 
@@ -2853,6 +3707,19 @@ define i64 @PR54812() {
 ; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1407
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: PR54812:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 521599
+; RV32-REMAT-NEXT:    li a1, -1
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: PR54812:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 1048447
+; RV64-REMAT-NEXT:    addiw a0, a0, 1407
+; RV64-REMAT-NEXT:    slli a0, a0, 12
+; RV64-REMAT-NEXT:    ret
   ret i64 -2158497792;
 }
 
@@ -2891,6 +3758,18 @@ define signext i32 @pos_2048() nounwind {
 ; RV64IXTHEADBB-NEXT:    li a0, 1
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 11
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: pos_2048:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    li a0, 1
+; RV32-REMAT-NEXT:    slli a0, a0, 11
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: pos_2048:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    li a0, 1
+; RV64-REMAT-NEXT:    slli a0, a0, 11
+; RV64-REMAT-NEXT:    ret
   ret i32 2048
 }
 
@@ -2941,6 +3820,21 @@ define i64 @imm64_same_lo_hi() nounwind {
 ; RV64IXTHEADBB-NEXT:    slli a1, a0, 32
 ; RV64IXTHEADBB-NEXT:    add a0, a0, a1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_same_lo_hi:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 65793
+; RV32-REMAT-NEXT:    addi a0, a0, 16
+; RV32-REMAT-NEXT:    mv a1, a0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_same_lo_hi:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 65793
+; RV64-REMAT-NEXT:    addiw a0, a0, 16
+; RV64-REMAT-NEXT:    slli a1, a0, 32
+; RV64-REMAT-NEXT:    add a0, a0, a1
+; RV64-REMAT-NEXT:    ret
   ret i64 1157442765409226768 ; 0x0101010101010101
 }
 
@@ -2998,6 +3892,21 @@ define i64 @imm64_same_lo_hi_optsize() nounwind optsize {
 ; RV64IXTHEADBB-NEXT:    slli a1, a0, 32
 ; RV64IXTHEADBB-NEXT:    add a0, a0, a1
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_same_lo_hi_optsize:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 65793
+; RV32-REMAT-NEXT:    addi a0, a0, 16
+; RV32-REMAT-NEXT:    mv a1, a0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_same_lo_hi_optsize:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 65793
+; RV64-REMAT-NEXT:    addiw a0, a0, 16
+; RV64-REMAT-NEXT:    slli a1, a0, 32
+; RV64-REMAT-NEXT:    add a0, a0, a1
+; RV64-REMAT-NEXT:    ret
   ret i64 1157442765409226768 ; 0x0101010101010101
 }
 
@@ -3067,5 +3976,23 @@ define i64 @imm64_same_lo_hi_negative() nounwind {
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 15
 ; RV64IXTHEADBB-NEXT:    addi a0, a0, 128
 ; RV64IXTHEADBB-NEXT:    ret
+;
+; RV32-REMAT-LABEL: imm64_same_lo_hi_negative:
+; RV32-REMAT:       # %bb.0:
+; RV32-REMAT-NEXT:    lui a0, 526344
+; RV32-REMAT-NEXT:    addi a0, a0, 128
+; RV32-REMAT-NEXT:    mv a1, a0
+; RV32-REMAT-NEXT:    ret
+;
+; RV64-REMAT-LABEL: imm64_same_lo_hi_negative:
+; RV64-REMAT:       # %bb.0:
+; RV64-REMAT-NEXT:    lui a0, 983297
+; RV64-REMAT-NEXT:    slli a0, a0, 4
+; RV64-REMAT-NEXT:    addi a0, a0, 257
+; RV64-REMAT-NEXT:    slli a0, a0, 16
+; RV64-REMAT-NEXT:    addi a0, a0, 257
+; RV64-REMAT-NEXT:    slli a0, a0, 15
+; RV64-REMAT-NEXT:    addi a0, a0, 128
+; RV64-REMAT-NEXT:    ret
   ret i64 9259542123273814144 ; 0x8080808080808080
 }

diff  --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll
new file mode 100644
index 000000000000000..ef91334c5ff0044
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr69586.ll
@@ -0,0 +1,1980 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
+; RUN:   -riscv-use-rematerializable-movimm=false | FileCheck %s --check-prefix=NOREMAT
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+xsfvcp \
+; RUN:   --riscv-use-rematerializable-movimm=true | FileCheck %s --check-prefix=REMAT
+
+define void @test(ptr %0, ptr %1, i64 %2) {
+; NOREMAT-LABEL: test:
+; NOREMAT:       # %bb.0:
+; NOREMAT-NEXT:    addi sp, sp, -368
+; NOREMAT-NEXT:    .cfi_def_cfa_offset 368
+; NOREMAT-NEXT:    sd ra, 360(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s0, 352(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s1, 344(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s2, 336(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s3, 328(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s4, 320(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s5, 312(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s6, 304(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s7, 296(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s8, 288(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s9, 280(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s10, 272(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s11, 264(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    .cfi_offset ra, -8
+; NOREMAT-NEXT:    .cfi_offset s0, -16
+; NOREMAT-NEXT:    .cfi_offset s1, -24
+; NOREMAT-NEXT:    .cfi_offset s2, -32
+; NOREMAT-NEXT:    .cfi_offset s3, -40
+; NOREMAT-NEXT:    .cfi_offset s4, -48
+; NOREMAT-NEXT:    .cfi_offset s5, -56
+; NOREMAT-NEXT:    .cfi_offset s6, -64
+; NOREMAT-NEXT:    .cfi_offset s7, -72
+; NOREMAT-NEXT:    .cfi_offset s8, -80
+; NOREMAT-NEXT:    .cfi_offset s9, -88
+; NOREMAT-NEXT:    .cfi_offset s10, -96
+; NOREMAT-NEXT:    .cfi_offset s11, -104
+; NOREMAT-NEXT:    li a2, 32
+; NOREMAT-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
+; NOREMAT-NEXT:    vle32.v v8, (a0)
+; NOREMAT-NEXT:    addi a2, a0, 512
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    addi a2, a0, 1024
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addi a2, a0, 1536
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    li a2, 1
+; NOREMAT-NEXT:    slli a2, a2, 11
+; NOREMAT-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    li a4, 5
+; NOREMAT-NEXT:    slli a2, a4, 9
+; NOREMAT-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    li a5, 3
+; NOREMAT-NEXT:    slli a2, a5, 10
+; NOREMAT-NEXT:    sd a2, 240(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    li a3, 7
+; NOREMAT-NEXT:    slli a2, a3, 9
+; NOREMAT-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    lui a2, 1
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    li a2, 9
+; NOREMAT-NEXT:    slli a6, a2, 9
+; NOREMAT-NEXT:    sd a6, 224(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v14, (a6)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a6)
+; NOREMAT-NEXT:    slli a6, a4, 10
+; NOREMAT-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v12, (a6)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a6)
+; NOREMAT-NEXT:    li s8, 11
+; NOREMAT-NEXT:    slli a6, s8, 9
+; NOREMAT-NEXT:    sd a6, 208(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v14, (a6)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a6)
+; NOREMAT-NEXT:    slli a5, a5, 11
+; NOREMAT-NEXT:    sd a5, 200(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a5)
+; NOREMAT-NEXT:    li s2, 13
+; NOREMAT-NEXT:    slli a5, s2, 9
+; NOREMAT-NEXT:    sd a5, 192(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a5)
+; NOREMAT-NEXT:    slli a5, a3, 10
+; NOREMAT-NEXT:    sd a5, 184(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a5)
+; NOREMAT-NEXT:    li t0, 15
+; NOREMAT-NEXT:    slli a5, t0, 9
+; NOREMAT-NEXT:    sd a5, 176(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a5)
+; NOREMAT-NEXT:    lui a5, 2
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a5)
+; NOREMAT-NEXT:    li a5, 17
+; NOREMAT-NEXT:    slli a5, a5, 9
+; NOREMAT-NEXT:    sd a5, 168(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a7, 17
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a5)
+; NOREMAT-NEXT:    slli a5, a2, 10
+; NOREMAT-NEXT:    sd a5, 160(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a5)
+; NOREMAT-NEXT:    li a5, 19
+; NOREMAT-NEXT:    slli a5, a5, 9
+; NOREMAT-NEXT:    sd a5, 152(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a6, 19
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a5)
+; NOREMAT-NEXT:    slli a4, a4, 11
+; NOREMAT-NEXT:    sd a4, 144(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v12, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    li s10, 21
+; NOREMAT-NEXT:    slli a4, s10, 9
+; NOREMAT-NEXT:    sd a4, 136(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    slli a4, s8, 10
+; NOREMAT-NEXT:    sd a4, 128(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v12, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    li s6, 23
+; NOREMAT-NEXT:    slli a4, s6, 9
+; NOREMAT-NEXT:    sd a4, 120(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    lui a4, 3
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v12, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    li s3, 25
+; NOREMAT-NEXT:    slli a4, s3, 9
+; NOREMAT-NEXT:    sd a4, 112(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    slli a4, s2, 10
+; NOREMAT-NEXT:    sd a4, 104(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v12, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    li t5, 27
+; NOREMAT-NEXT:    slli a4, t5, 9
+; NOREMAT-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    slli a3, a3, 11
+; NOREMAT-NEXT:    sd a3, 88(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    li t2, 29
+; NOREMAT-NEXT:    slli a3, t2, 9
+; NOREMAT-NEXT:    sd a3, 80(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v14, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    slli a3, t0, 10
+; NOREMAT-NEXT:    sd a3, 72(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    li a5, 31
+; NOREMAT-NEXT:    slli a3, a5, 9
+; NOREMAT-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v14, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    lui a4, 4
+; NOREMAT-NEXT:    add a3, a0, a4
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    addiw a3, a4, 512
+; NOREMAT-NEXT:    sd a3, 56(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v14, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    slli a3, a7, 10
+; NOREMAT-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    addiw a3, a4, 1536
+; NOREMAT-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v14, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    slli a2, a2, 11
+; NOREMAT-NEXT:    sd a2, 32(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    lui s1, 5
+; NOREMAT-NEXT:    addiw a2, s1, -1536
+; NOREMAT-NEXT:    sd a2, 24(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli a2, a6, 10
+; NOREMAT-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a2, s1, -512
+; NOREMAT-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a2, a0, s1
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw ra, s1, 512
+; NOREMAT-NEXT:    add a2, a0, ra
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli s11, s10, 10
+; NOREMAT-NEXT:    add a2, a0, s11
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw s10, s1, 1536
+; NOREMAT-NEXT:    add a2, a0, s10
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli s9, s8, 11
+; NOREMAT-NEXT:    add a2, a0, s9
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    lui t1, 6
+; NOREMAT-NEXT:    addiw s8, t1, -1536
+; NOREMAT-NEXT:    add a2, a0, s8
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli s7, s6, 10
+; NOREMAT-NEXT:    add a2, a0, s7
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw s6, t1, -512
+; NOREMAT-NEXT:    add a2, a0, s6
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a2, a0, t1
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw s5, t1, 512
+; NOREMAT-NEXT:    add a2, a0, s5
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli s4, s3, 10
+; NOREMAT-NEXT:    add a2, a0, s4
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw s3, t1, 1536
+; NOREMAT-NEXT:    add a2, a0, s3
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli s2, s2, 11
+; NOREMAT-NEXT:    add a2, a0, s2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    lui a3, 7
+; NOREMAT-NEXT:    addiw s0, a3, -1536
+; NOREMAT-NEXT:    add a2, a0, s0
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli t6, t5, 10
+; NOREMAT-NEXT:    add a2, a0, t6
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw t5, a3, -512
+; NOREMAT-NEXT:    add a2, a0, t5
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a2, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw t4, a3, 512
+; NOREMAT-NEXT:    add a2, a0, t4
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli t3, t2, 10
+; NOREMAT-NEXT:    add a2, a0, t3
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    addiw t2, a3, 1536
+; NOREMAT-NEXT:    add a2, a0, t2
+; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli t0, t0, 11
+; NOREMAT-NEXT:    add a2, a0, t0
+; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    lui a2, 8
+; NOREMAT-NEXT:    addiw a7, a2, -1536
+; NOREMAT-NEXT:    add a4, a0, a7
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    slli a6, a5, 10
+; NOREMAT-NEXT:    add a4, a0, a6
+; NOREMAT-NEXT:    vle32.v v12, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    addiw a5, a2, -512
+; NOREMAT-NEXT:    add a4, a0, a5
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    add a0, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a0)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    addi a0, a1, 1024
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    addi a0, a1, 1536
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    lui a0, 1
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    lui a0, 2
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    lui a0, 3
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    lui a0, 4
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    add s1, a1, s1
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (s1)
+; NOREMAT-NEXT:    add ra, a1, ra
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (ra)
+; NOREMAT-NEXT:    add s11, a1, s11
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (s11)
+; NOREMAT-NEXT:    add s10, a1, s10
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (s10)
+; NOREMAT-NEXT:    add s9, a1, s9
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (s9)
+; NOREMAT-NEXT:    add s8, a1, s8
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (s8)
+; NOREMAT-NEXT:    add s7, a1, s7
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (s7)
+; NOREMAT-NEXT:    add s6, a1, s6
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (s6)
+; NOREMAT-NEXT:    add t1, a1, t1
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (t1)
+; NOREMAT-NEXT:    add s5, a1, s5
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (s5)
+; NOREMAT-NEXT:    add s4, a1, s4
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (s4)
+; NOREMAT-NEXT:    add s3, a1, s3
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (s3)
+; NOREMAT-NEXT:    add s2, a1, s2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (s2)
+; NOREMAT-NEXT:    add s0, a1, s0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (s0)
+; NOREMAT-NEXT:    add t6, a1, t6
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (t6)
+; NOREMAT-NEXT:    add t5, a1, t5
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (t5)
+; NOREMAT-NEXT:    add a3, a1, a3
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a3)
+; NOREMAT-NEXT:    add t4, a1, t4
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (t4)
+; NOREMAT-NEXT:    add t3, a1, t3
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (t3)
+; NOREMAT-NEXT:    add t2, a1, t2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (t2)
+; NOREMAT-NEXT:    add t0, a1, t0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (t0)
+; NOREMAT-NEXT:    add a7, a1, a7
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a7)
+; NOREMAT-NEXT:    add a6, a1, a6
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a6)
+; NOREMAT-NEXT:    add a5, a1, a5
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a5)
+; NOREMAT-NEXT:    add a0, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    addiw a0, a2, 512
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    addiw a0, a2, 1024
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    addiw a0, a2, 1536
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    li a0, 17
+; NOREMAT-NEXT:    slli a0, a0, 11
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    lui a0, 9
+; NOREMAT-NEXT:    addiw a2, a0, -1536
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, -1024
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a2)
+; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, 512
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, 1024
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a0, a0, 1536
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    li a0, 19
+; NOREMAT-NEXT:    slli a0, a0, 11
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    lui a0, 10
+; NOREMAT-NEXT:    addiw a2, a0, -1536
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, -1024
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a2)
+; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a0, a0, 512
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld ra, 360(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s0, 352(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s1, 344(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s2, 336(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s3, 328(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s4, 320(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s5, 312(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s6, 304(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s8, 288(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s9, 280(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s10, 272(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s11, 264(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    addi sp, sp, 368
+; NOREMAT-NEXT:    ret
+;
+; REMAT-LABEL: test:
+; REMAT:       # %bb.0:
+; REMAT-NEXT:    addi sp, sp, -112
+; REMAT-NEXT:    .cfi_def_cfa_offset 112
+; REMAT-NEXT:    sd ra, 104(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s0, 96(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s3, 72(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s4, 64(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s5, 56(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s6, 48(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s7, 40(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s8, 32(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s9, 24(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s10, 16(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s11, 8(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    .cfi_offset ra, -8
+; REMAT-NEXT:    .cfi_offset s0, -16
+; REMAT-NEXT:    .cfi_offset s1, -24
+; REMAT-NEXT:    .cfi_offset s2, -32
+; REMAT-NEXT:    .cfi_offset s3, -40
+; REMAT-NEXT:    .cfi_offset s4, -48
+; REMAT-NEXT:    .cfi_offset s5, -56
+; REMAT-NEXT:    .cfi_offset s6, -64
+; REMAT-NEXT:    .cfi_offset s7, -72
+; REMAT-NEXT:    .cfi_offset s8, -80
+; REMAT-NEXT:    .cfi_offset s9, -88
+; REMAT-NEXT:    .cfi_offset s10, -96
+; REMAT-NEXT:    .cfi_offset s11, -104
+; REMAT-NEXT:    li a2, 32
+; REMAT-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
+; REMAT-NEXT:    vle32.v v8, (a0)
+; REMAT-NEXT:    addi a2, a0, 512
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    addi a2, a0, 1024
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    addi a2, a0, 1536
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 1
+; REMAT-NEXT:    slli a2, a2, 11
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 5
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 3
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 7
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui a2, 1
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 9
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 5
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 11
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 3
+; REMAT-NEXT:    slli a2, a2, 11
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 13
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 7
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 15
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui a2, 2
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 17
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 9
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 19
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 5
+; REMAT-NEXT:    slli a2, a2, 11
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 21
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 11
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 23
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui a2, 3
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 25
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 13
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 27
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 7
+; REMAT-NEXT:    slli a2, a2, 11
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 29
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 15
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    li a2, 31
+; REMAT-NEXT:    slli a2, a2, 9
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui a2, 4
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui a2, 4
+; REMAT-NEXT:    addiw a2, a2, 512
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 17
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui a2, 4
+; REMAT-NEXT:    addiw a2, a2, 1536
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 9
+; REMAT-NEXT:    slli a2, a2, 11
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui a2, 5
+; REMAT-NEXT:    addiw a2, a2, -1536
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a2, 19
+; REMAT-NEXT:    slli a2, a2, 10
+; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui ra, 5
+; REMAT-NEXT:    addiw ra, ra, -512
+; REMAT-NEXT:    add a2, a0, ra
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui s11, 5
+; REMAT-NEXT:    add a2, a0, s11
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui s10, 5
+; REMAT-NEXT:    addiw s10, s10, 512
+; REMAT-NEXT:    add a2, a0, s10
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li s9, 21
+; REMAT-NEXT:    slli s9, s9, 10
+; REMAT-NEXT:    add a2, a0, s9
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui s8, 5
+; REMAT-NEXT:    addiw s8, s8, 1536
+; REMAT-NEXT:    add a2, a0, s8
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li s7, 11
+; REMAT-NEXT:    slli s7, s7, 11
+; REMAT-NEXT:    add a2, a0, s7
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui s6, 6
+; REMAT-NEXT:    addiw s6, s6, -1536
+; REMAT-NEXT:    add a2, a0, s6
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li s5, 23
+; REMAT-NEXT:    slli s5, s5, 10
+; REMAT-NEXT:    add a2, a0, s5
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui s4, 6
+; REMAT-NEXT:    addiw s4, s4, -512
+; REMAT-NEXT:    add a2, a0, s4
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui s3, 6
+; REMAT-NEXT:    add a2, a0, s3
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui s2, 6
+; REMAT-NEXT:    addiw s2, s2, 512
+; REMAT-NEXT:    add a2, a0, s2
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li s1, 25
+; REMAT-NEXT:    slli s1, s1, 10
+; REMAT-NEXT:    add a2, a0, s1
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui s0, 6
+; REMAT-NEXT:    addiw s0, s0, 1536
+; REMAT-NEXT:    add a2, a0, s0
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li t6, 13
+; REMAT-NEXT:    slli t6, t6, 11
+; REMAT-NEXT:    add a2, a0, t6
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui t5, 7
+; REMAT-NEXT:    addiw t5, t5, -1536
+; REMAT-NEXT:    add a2, a0, t5
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li t4, 27
+; REMAT-NEXT:    slli t4, t4, 10
+; REMAT-NEXT:    add a2, a0, t4
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui t3, 7
+; REMAT-NEXT:    addiw t3, t3, -512
+; REMAT-NEXT:    add a2, a0, t3
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui t2, 7
+; REMAT-NEXT:    add a2, a0, t2
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui t1, 7
+; REMAT-NEXT:    addiw t1, t1, 512
+; REMAT-NEXT:    add a2, a0, t1
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li t0, 29
+; REMAT-NEXT:    slli t0, t0, 10
+; REMAT-NEXT:    add a2, a0, t0
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui a7, 7
+; REMAT-NEXT:    addiw a7, a7, 1536
+; REMAT-NEXT:    add a2, a0, a7
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a6, 15
+; REMAT-NEXT:    slli a6, a6, 11
+; REMAT-NEXT:    add a2, a0, a6
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui a5, 8
+; REMAT-NEXT:    addiw a5, a5, -1536
+; REMAT-NEXT:    add a2, a0, a5
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    li a4, 31
+; REMAT-NEXT:    slli a4, a4, 10
+; REMAT-NEXT:    add a2, a0, a4
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    lui a3, 8
+; REMAT-NEXT:    addiw a3, a3, -512
+; REMAT-NEXT:    add a2, a0, a3
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    lui a2, 8
+; REMAT-NEXT:    add a0, a0, a2
+; REMAT-NEXT:    vle32.v v12, (a0)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    addi a0, a1, 1024
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    addi a0, a1, 1536
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 1
+; REMAT-NEXT:    slli a0, a0, 11
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 5
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 3
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 7
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 1
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 9
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 5
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 11
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 3
+; REMAT-NEXT:    slli a0, a0, 11
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 13
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 7
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 15
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 2
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 17
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 9
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 19
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 5
+; REMAT-NEXT:    slli a0, a0, 11
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 21
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 11
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 23
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 3
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 25
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 13
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 27
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 7
+; REMAT-NEXT:    slli a0, a0, 11
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 29
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 15
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    li a0, 31
+; REMAT-NEXT:    slli a0, a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 4
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 4
+; REMAT-NEXT:    addiw a0, a0, 512
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 17
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 4
+; REMAT-NEXT:    addiw a0, a0, 1536
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 9
+; REMAT-NEXT:    slli a0, a0, 11
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 5
+; REMAT-NEXT:    addiw a0, a0, -1536
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 19
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    add ra, a1, ra
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (ra)
+; REMAT-NEXT:    add s11, a1, s11
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (s11)
+; REMAT-NEXT:    add s10, a1, s10
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (s10)
+; REMAT-NEXT:    add s9, a1, s9
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (s9)
+; REMAT-NEXT:    add s8, a1, s8
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (s8)
+; REMAT-NEXT:    add s7, a1, s7
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (s7)
+; REMAT-NEXT:    add s6, a1, s6
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (s6)
+; REMAT-NEXT:    add s5, a1, s5
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (s5)
+; REMAT-NEXT:    add s4, a1, s4
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (s4)
+; REMAT-NEXT:    add s3, a1, s3
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (s3)
+; REMAT-NEXT:    add s2, a1, s2
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (s2)
+; REMAT-NEXT:    add s1, a1, s1
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (s1)
+; REMAT-NEXT:    add s0, a1, s0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (s0)
+; REMAT-NEXT:    add t6, a1, t6
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (t6)
+; REMAT-NEXT:    add t5, a1, t5
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (t5)
+; REMAT-NEXT:    add t4, a1, t4
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (t4)
+; REMAT-NEXT:    add t3, a1, t3
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (t3)
+; REMAT-NEXT:    add t2, a1, t2
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (t2)
+; REMAT-NEXT:    add t1, a1, t1
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (t1)
+; REMAT-NEXT:    add t0, a1, t0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (t0)
+; REMAT-NEXT:    add a7, a1, a7
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a7)
+; REMAT-NEXT:    add a6, a1, a6
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a6)
+; REMAT-NEXT:    add a5, a1, a5
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a5)
+; REMAT-NEXT:    add a4, a1, a4
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a4)
+; REMAT-NEXT:    add a3, a1, a3
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a3)
+; REMAT-NEXT:    add a2, a1, a2
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a2)
+; REMAT-NEXT:    lui a0, 8
+; REMAT-NEXT:    addiw a0, a0, 512
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 8
+; REMAT-NEXT:    addiw a0, a0, 1024
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 8
+; REMAT-NEXT:    addiw a0, a0, 1536
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 17
+; REMAT-NEXT:    slli a0, a0, 11
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 9
+; REMAT-NEXT:    addiw a0, a0, -1536
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 9
+; REMAT-NEXT:    addiw a0, a0, -1024
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 9
+; REMAT-NEXT:    addiw a0, a0, -512
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 9
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 9
+; REMAT-NEXT:    addiw a0, a0, 512
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 9
+; REMAT-NEXT:    addiw a0, a0, 1024
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 9
+; REMAT-NEXT:    addiw a0, a0, 1536
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    li a0, 19
+; REMAT-NEXT:    slli a0, a0, 11
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 10
+; REMAT-NEXT:    addiw a0, a0, -1536
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 10
+; REMAT-NEXT:    addiw a0, a0, -1024
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 10
+; REMAT-NEXT:    addiw a0, a0, -512
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    lui a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    lui a0, 10
+; REMAT-NEXT:    addiw a0, a0, 512
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; REMAT-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s1, 88(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s2, 80(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s3, 72(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s6, 48(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s7, 40(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s8, 32(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s9, 24(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s10, 16(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    ld s11, 8(sp) # 8-byte Folded Reload
+; REMAT-NEXT:    addi sp, sp, 112
+; REMAT-NEXT:    ret
+  %4 = tail call i64 @llvm.riscv.vsetvli.i64(i64 32, i64 2, i64 1)
+  %5 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %0, i64 %4)
+  %6 = getelementptr inbounds i32, ptr %0, i64 128
+  %7 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %6, i64 %4)
+  %8 = getelementptr inbounds i32, ptr %0, i64 256
+  %9 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %5, <vscale x 4 x i32> %7, i64 %4)
+  %10 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %8, i64 %4)
+  %11 = getelementptr inbounds i32, ptr %0, i64 384
+  %12 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %7, <vscale x 4 x i32> %9, i64 %4)
+  %13 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %11, i64 %4)
+  %14 = getelementptr inbounds i32, ptr %0, i64 512
+  %15 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %10, <vscale x 4 x i32> %12, i64 %4)
+  %16 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %14, i64 %4)
+  %17 = getelementptr inbounds i32, ptr %0, i64 640
+  %18 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %13, <vscale x 4 x i32> %15, i64 %4)
+  %19 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %17, i64 %4)
+  %20 = getelementptr inbounds i32, ptr %0, i64 768
+  %21 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %16, <vscale x 4 x i32> %18, i64 %4)
+  %22 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %20, i64 %4)
+  %23 = getelementptr inbounds i32, ptr %0, i64 896
+  %24 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %19, <vscale x 4 x i32> %21, i64 %4)
+  %25 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %23, i64 %4)
+  %26 = getelementptr inbounds i32, ptr %0, i64 1024
+  %27 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %22, <vscale x 4 x i32> %24, i64 %4)
+  %28 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %26, i64 %4)
+  %29 = getelementptr inbounds i32, ptr %0, i64 1152
+  %30 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %25, <vscale x 4 x i32> %27, i64 %4)
+  %31 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %29, i64 %4)
+  %32 = getelementptr inbounds i32, ptr %0, i64 1280
+  %33 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %28, <vscale x 4 x i32> %30, i64 %4)
+  %34 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %32, i64 %4)
+  %35 = getelementptr inbounds i32, ptr %0, i64 1408
+  %36 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %31, <vscale x 4 x i32> %33, i64 %4)
+  %37 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %35, i64 %4)
+  %38 = getelementptr inbounds i32, ptr %0, i64 1536
+  %39 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %34, <vscale x 4 x i32> %36, i64 %4)
+  %40 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %38, i64 %4)
+  %41 = getelementptr inbounds i32, ptr %0, i64 1664
+  %42 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %37, <vscale x 4 x i32> %39, i64 %4)
+  %43 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %41, i64 %4)
+  %44 = getelementptr inbounds i32, ptr %0, i64 1792
+  %45 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %40, <vscale x 4 x i32> %42, i64 %4)
+  %46 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %44, i64 %4)
+  %47 = getelementptr inbounds i32, ptr %0, i64 1920
+  %48 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %43, <vscale x 4 x i32> %45, i64 %4)
+  %49 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %47, i64 %4)
+  %50 = getelementptr inbounds i32, ptr %0, i64 2048
+  %51 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %46, <vscale x 4 x i32> %48, i64 %4)
+  %52 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %50, i64 %4)
+  %53 = getelementptr inbounds i32, ptr %0, i64 2176
+  %54 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %49, <vscale x 4 x i32> %51, i64 %4)
+  %55 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %53, i64 %4)
+  %56 = getelementptr inbounds i32, ptr %0, i64 2304
+  %57 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %52, <vscale x 4 x i32> %54, i64 %4)
+  %58 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %56, i64 %4)
+  %59 = getelementptr inbounds i32, ptr %0, i64 2432
+  %60 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %55, <vscale x 4 x i32> %57, i64 %4)
+  %61 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %59, i64 %4)
+  %62 = getelementptr inbounds i32, ptr %0, i64 2560
+  %63 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %58, <vscale x 4 x i32> %60, i64 %4)
+  %64 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %62, i64 %4)
+  %65 = getelementptr inbounds i32, ptr %0, i64 2688
+  %66 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %61, <vscale x 4 x i32> %63, i64 %4)
+  %67 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %65, i64 %4)
+  %68 = getelementptr inbounds i32, ptr %0, i64 2816
+  %69 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %64, <vscale x 4 x i32> %66, i64 %4)
+  %70 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %68, i64 %4)
+  %71 = getelementptr inbounds i32, ptr %0, i64 2944
+  %72 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %67, <vscale x 4 x i32> %69, i64 %4)
+  %73 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %71, i64 %4)
+  %74 = getelementptr inbounds i32, ptr %0, i64 3072
+  %75 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %70, <vscale x 4 x i32> %72, i64 %4)
+  %76 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %74, i64 %4)
+  %77 = getelementptr inbounds i32, ptr %0, i64 3200
+  %78 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %73, <vscale x 4 x i32> %75, i64 %4)
+  %79 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %77, i64 %4)
+  %80 = getelementptr inbounds i32, ptr %0, i64 3328
+  %81 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %76, <vscale x 4 x i32> %78, i64 %4)
+  %82 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %80, i64 %4)
+  %83 = getelementptr inbounds i32, ptr %0, i64 3456
+  %84 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %79, <vscale x 4 x i32> %81, i64 %4)
+  %85 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %83, i64 %4)
+  %86 = getelementptr inbounds i32, ptr %0, i64 3584
+  %87 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %82, <vscale x 4 x i32> %84, i64 %4)
+  %88 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %86, i64 %4)
+  %89 = getelementptr inbounds i32, ptr %0, i64 3712
+  %90 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %85, <vscale x 4 x i32> %87, i64 %4)
+  %91 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %89, i64 %4)
+  %92 = getelementptr inbounds i32, ptr %0, i64 3840
+  %93 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %88, <vscale x 4 x i32> %90, i64 %4)
+  %94 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %92, i64 %4)
+  %95 = getelementptr inbounds i32, ptr %0, i64 3968
+  %96 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %91, <vscale x 4 x i32> %93, i64 %4)
+  %97 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %95, i64 %4)
+  %98 = getelementptr inbounds i32, ptr %0, i64 4096
+  %99 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %94, <vscale x 4 x i32> %96, i64 %4)
+  %100 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %98, i64 %4)
+  %101 = getelementptr inbounds i32, ptr %0, i64 4224
+  %102 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %97, <vscale x 4 x i32> %99, i64 %4)
+  %103 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %101, i64 %4)
+  %104 = getelementptr inbounds i32, ptr %0, i64 4352
+  %105 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %100, <vscale x 4 x i32> %102, i64 %4)
+  %106 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %104, i64 %4)
+  %107 = getelementptr inbounds i32, ptr %0, i64 4480
+  %108 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %103, <vscale x 4 x i32> %105, i64 %4)
+  %109 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %107, i64 %4)
+  %110 = getelementptr inbounds i32, ptr %0, i64 4608
+  %111 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %106, <vscale x 4 x i32> %108, i64 %4)
+  %112 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %110, i64 %4)
+  %113 = getelementptr inbounds i32, ptr %0, i64 4736
+  %114 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %109, <vscale x 4 x i32> %111, i64 %4)
+  %115 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %113, i64 %4)
+  %116 = getelementptr inbounds i32, ptr %0, i64 4864
+  %117 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %112, <vscale x 4 x i32> %114, i64 %4)
+  %118 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %116, i64 %4)
+  %119 = getelementptr inbounds i32, ptr %0, i64 4992
+  %120 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %115, <vscale x 4 x i32> %117, i64 %4)
+  %121 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %119, i64 %4)
+  %122 = getelementptr inbounds i32, ptr %0, i64 5120
+  %123 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %118, <vscale x 4 x i32> %120, i64 %4)
+  %124 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %122, i64 %4)
+  %125 = getelementptr inbounds i32, ptr %0, i64 5248
+  %126 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %121, <vscale x 4 x i32> %123, i64 %4)
+  %127 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %125, i64 %4)
+  %128 = getelementptr inbounds i32, ptr %0, i64 5376
+  %129 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %124, <vscale x 4 x i32> %126, i64 %4)
+  %130 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %128, i64 %4)
+  %131 = getelementptr inbounds i32, ptr %0, i64 5504
+  %132 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %127, <vscale x 4 x i32> %129, i64 %4)
+  %133 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %131, i64 %4)
+  %134 = getelementptr inbounds i32, ptr %0, i64 5632
+  %135 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %130, <vscale x 4 x i32> %132, i64 %4)
+  %136 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %134, i64 %4)
+  %137 = getelementptr inbounds i32, ptr %0, i64 5760
+  %138 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %133, <vscale x 4 x i32> %135, i64 %4)
+  %139 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %137, i64 %4)
+  %140 = getelementptr inbounds i32, ptr %0, i64 5888
+  %141 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %136, <vscale x 4 x i32> %138, i64 %4)
+  %142 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %140, i64 %4)
+  %143 = getelementptr inbounds i32, ptr %0, i64 6016
+  %144 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %139, <vscale x 4 x i32> %141, i64 %4)
+  %145 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %143, i64 %4)
+  %146 = getelementptr inbounds i32, ptr %0, i64 6144
+  %147 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %142, <vscale x 4 x i32> %144, i64 %4)
+  %148 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %146, i64 %4)
+  %149 = getelementptr inbounds i32, ptr %0, i64 6272
+  %150 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %145, <vscale x 4 x i32> %147, i64 %4)
+  %151 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %149, i64 %4)
+  %152 = getelementptr inbounds i32, ptr %0, i64 6400
+  %153 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %148, <vscale x 4 x i32> %150, i64 %4)
+  %154 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %152, i64 %4)
+  %155 = getelementptr inbounds i32, ptr %0, i64 6528
+  %156 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %151, <vscale x 4 x i32> %153, i64 %4)
+  %157 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %155, i64 %4)
+  %158 = getelementptr inbounds i32, ptr %0, i64 6656
+  %159 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %154, <vscale x 4 x i32> %156, i64 %4)
+  %160 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %158, i64 %4)
+  %161 = getelementptr inbounds i32, ptr %0, i64 6784
+  %162 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %157, <vscale x 4 x i32> %159, i64 %4)
+  %163 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %161, i64 %4)
+  %164 = getelementptr inbounds i32, ptr %0, i64 6912
+  %165 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %160, <vscale x 4 x i32> %162, i64 %4)
+  %166 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %164, i64 %4)
+  %167 = getelementptr inbounds i32, ptr %0, i64 7040
+  %168 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %163, <vscale x 4 x i32> %165, i64 %4)
+  %169 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %167, i64 %4)
+  %170 = getelementptr inbounds i32, ptr %0, i64 7168
+  %171 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %166, <vscale x 4 x i32> %168, i64 %4)
+  %172 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %170, i64 %4)
+  %173 = getelementptr inbounds i32, ptr %0, i64 7296
+  %174 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %169, <vscale x 4 x i32> %171, i64 %4)
+  %175 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %173, i64 %4)
+  %176 = getelementptr inbounds i32, ptr %0, i64 7424
+  %177 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %172, <vscale x 4 x i32> %174, i64 %4)
+  %178 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %176, i64 %4)
+  %179 = getelementptr inbounds i32, ptr %0, i64 7552
+  %180 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %175, <vscale x 4 x i32> %177, i64 %4)
+  %181 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %179, i64 %4)
+  %182 = getelementptr inbounds i32, ptr %0, i64 7680
+  %183 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %178, <vscale x 4 x i32> %180, i64 %4)
+  %184 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %182, i64 %4)
+  %185 = getelementptr inbounds i32, ptr %0, i64 7808
+  %186 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %181, <vscale x 4 x i32> %183, i64 %4)
+  %187 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %185, i64 %4)
+  %188 = getelementptr inbounds i32, ptr %0, i64 7936
+  %189 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %184, <vscale x 4 x i32> %186, i64 %4)
+  %190 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %188, i64 %4)
+  %191 = getelementptr inbounds i32, ptr %0, i64 8064
+  %192 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %187, <vscale x 4 x i32> %189, i64 %4)
+  %193 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %191, i64 %4)
+  %194 = getelementptr inbounds i32, ptr %0, i64 8192
+  %195 = tail call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> poison, ptr %194, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %190, <vscale x 4 x i32> %192, i64 %4)
+  tail call void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64 3, i64 0, <vscale x 4 x i32> %193, <vscale x 4 x i32> %195, i64 %4)
+  %196 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  %197 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  %198 = getelementptr inbounds i32, ptr %1, i64 256
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %196, ptr %198, i64 %4)
+  %199 = getelementptr inbounds i32, ptr %1, i64 384
+  %200 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %197, ptr %199, i64 %4)
+  %201 = getelementptr inbounds i32, ptr %1, i64 512
+  %202 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %200, ptr %201, i64 %4)
+  %203 = getelementptr inbounds i32, ptr %1, i64 640
+  %204 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %202, ptr %203, i64 %4)
+  %205 = getelementptr inbounds i32, ptr %1, i64 768
+  %206 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %204, ptr %205, i64 %4)
+  %207 = getelementptr inbounds i32, ptr %1, i64 896
+  %208 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %206, ptr %207, i64 %4)
+  %209 = getelementptr inbounds i32, ptr %1, i64 1024
+  %210 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %208, ptr %209, i64 %4)
+  %211 = getelementptr inbounds i32, ptr %1, i64 1152
+  %212 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %210, ptr %211, i64 %4)
+  %213 = getelementptr inbounds i32, ptr %1, i64 1280
+  %214 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %212, ptr %213, i64 %4)
+  %215 = getelementptr inbounds i32, ptr %1, i64 1408
+  %216 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %214, ptr %215, i64 %4)
+  %217 = getelementptr inbounds i32, ptr %1, i64 1536
+  %218 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %216, ptr %217, i64 %4)
+  %219 = getelementptr inbounds i32, ptr %1, i64 1664
+  %220 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %218, ptr %219, i64 %4)
+  %221 = getelementptr inbounds i32, ptr %1, i64 1792
+  %222 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %220, ptr %221, i64 %4)
+  %223 = getelementptr inbounds i32, ptr %1, i64 1920
+  %224 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %222, ptr %223, i64 %4)
+  %225 = getelementptr inbounds i32, ptr %1, i64 2048
+  %226 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %224, ptr %225, i64 %4)
+  %227 = getelementptr inbounds i32, ptr %1, i64 2176
+  %228 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %226, ptr %227, i64 %4)
+  %229 = getelementptr inbounds i32, ptr %1, i64 2304
+  %230 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %228, ptr %229, i64 %4)
+  %231 = getelementptr inbounds i32, ptr %1, i64 2432
+  %232 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %230, ptr %231, i64 %4)
+  %233 = getelementptr inbounds i32, ptr %1, i64 2560
+  %234 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %232, ptr %233, i64 %4)
+  %235 = getelementptr inbounds i32, ptr %1, i64 2688
+  %236 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %234, ptr %235, i64 %4)
+  %237 = getelementptr inbounds i32, ptr %1, i64 2816
+  %238 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %236, ptr %237, i64 %4)
+  %239 = getelementptr inbounds i32, ptr %1, i64 2944
+  %240 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %238, ptr %239, i64 %4)
+  %241 = getelementptr inbounds i32, ptr %1, i64 3072
+  %242 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %240, ptr %241, i64 %4)
+  %243 = getelementptr inbounds i32, ptr %1, i64 3200
+  %244 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %242, ptr %243, i64 %4)
+  %245 = getelementptr inbounds i32, ptr %1, i64 3328
+  %246 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %244, ptr %245, i64 %4)
+  %247 = getelementptr inbounds i32, ptr %1, i64 3456
+  %248 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %246, ptr %247, i64 %4)
+  %249 = getelementptr inbounds i32, ptr %1, i64 3584
+  %250 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %248, ptr %249, i64 %4)
+  %251 = getelementptr inbounds i32, ptr %1, i64 3712
+  %252 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %250, ptr %251, i64 %4)
+  %253 = getelementptr inbounds i32, ptr %1, i64 3840
+  %254 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %252, ptr %253, i64 %4)
+  %255 = getelementptr inbounds i32, ptr %1, i64 3968
+  %256 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %254, ptr %255, i64 %4)
+  %257 = getelementptr inbounds i32, ptr %1, i64 4096
+  %258 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %256, ptr %257, i64 %4)
+  %259 = getelementptr inbounds i32, ptr %1, i64 4224
+  %260 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %258, ptr %259, i64 %4)
+  %261 = getelementptr inbounds i32, ptr %1, i64 4352
+  %262 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %260, ptr %261, i64 %4)
+  %263 = getelementptr inbounds i32, ptr %1, i64 4480
+  %264 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %262, ptr %263, i64 %4)
+  %265 = getelementptr inbounds i32, ptr %1, i64 4608
+  %266 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %264, ptr %265, i64 %4)
+  %267 = getelementptr inbounds i32, ptr %1, i64 4736
+  %268 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %266, ptr %267, i64 %4)
+  %269 = getelementptr inbounds i32, ptr %1, i64 4864
+  %270 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %268, ptr %269, i64 %4)
+  %271 = getelementptr inbounds i32, ptr %1, i64 4992
+  %272 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %270, ptr %271, i64 %4)
+  %273 = getelementptr inbounds i32, ptr %1, i64 5120
+  %274 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %272, ptr %273, i64 %4)
+  %275 = getelementptr inbounds i32, ptr %1, i64 5248
+  %276 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %274, ptr %275, i64 %4)
+  %277 = getelementptr inbounds i32, ptr %1, i64 5376
+  %278 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %276, ptr %277, i64 %4)
+  %279 = getelementptr inbounds i32, ptr %1, i64 5504
+  %280 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %278, ptr %279, i64 %4)
+  %281 = getelementptr inbounds i32, ptr %1, i64 5632
+  %282 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %280, ptr %281, i64 %4)
+  %283 = getelementptr inbounds i32, ptr %1, i64 5760
+  %284 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %282, ptr %283, i64 %4)
+  %285 = getelementptr inbounds i32, ptr %1, i64 5888
+  %286 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %284, ptr %285, i64 %4)
+  %287 = getelementptr inbounds i32, ptr %1, i64 6016
+  %288 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %286, ptr %287, i64 %4)
+  %289 = getelementptr inbounds i32, ptr %1, i64 6144
+  %290 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %288, ptr %289, i64 %4)
+  %291 = getelementptr inbounds i32, ptr %1, i64 6272
+  %292 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %290, ptr %291, i64 %4)
+  %293 = getelementptr inbounds i32, ptr %1, i64 6400
+  %294 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %292, ptr %293, i64 %4)
+  %295 = getelementptr inbounds i32, ptr %1, i64 6528
+  %296 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %294, ptr %295, i64 %4)
+  %297 = getelementptr inbounds i32, ptr %1, i64 6656
+  %298 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %296, ptr %297, i64 %4)
+  %299 = getelementptr inbounds i32, ptr %1, i64 6784
+  %300 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %298, ptr %299, i64 %4)
+  %301 = getelementptr inbounds i32, ptr %1, i64 6912
+  %302 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %300, ptr %301, i64 %4)
+  %303 = getelementptr inbounds i32, ptr %1, i64 7040
+  %304 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %302, ptr %303, i64 %4)
+  %305 = getelementptr inbounds i32, ptr %1, i64 7168
+  %306 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %304, ptr %305, i64 %4)
+  %307 = getelementptr inbounds i32, ptr %1, i64 7296
+  %308 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %306, ptr %307, i64 %4)
+  %309 = getelementptr inbounds i32, ptr %1, i64 7424
+  %310 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %308, ptr %309, i64 %4)
+  %311 = getelementptr inbounds i32, ptr %1, i64 7552
+  %312 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %310, ptr %311, i64 %4)
+  %313 = getelementptr inbounds i32, ptr %1, i64 7680
+  %314 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %312, ptr %313, i64 %4)
+  %315 = getelementptr inbounds i32, ptr %1, i64 7808
+  %316 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %314, ptr %315, i64 %4)
+  %317 = getelementptr inbounds i32, ptr %1, i64 7936
+  %318 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %316, ptr %317, i64 %4)
+  %319 = getelementptr inbounds i32, ptr %1, i64 8064
+  %320 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %318, ptr %319, i64 %4)
+  %321 = getelementptr inbounds i32, ptr %1, i64 8192
+  %322 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %320, ptr %321, i64 %4)
+  %323 = getelementptr inbounds i32, ptr %1, i64 8320
+  %324 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %322, ptr %323, i64 %4)
+  %325 = getelementptr inbounds i32, ptr %1, i64 8448
+  %326 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %324, ptr %325, i64 %4)
+  %327 = getelementptr inbounds i32, ptr %1, i64 8576
+  %328 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %326, ptr %327, i64 %4)
+  %329 = getelementptr inbounds i32, ptr %1, i64 8704
+  %330 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %328, ptr %329, i64 %4)
+  %331 = getelementptr inbounds i32, ptr %1, i64 8832
+  %332 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %330, ptr %331, i64 %4)
+  %333 = getelementptr inbounds i32, ptr %1, i64 8960
+  %334 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %332, ptr %333, i64 %4)
+  %335 = getelementptr inbounds i32, ptr %1, i64 9088
+  %336 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %334, ptr %335, i64 %4)
+  %337 = getelementptr inbounds i32, ptr %1, i64 9216
+  %338 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %336, ptr %337, i64 %4)
+  %339 = getelementptr inbounds i32, ptr %1, i64 9344
+  %340 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %338, ptr %339, i64 %4)
+  %341 = getelementptr inbounds i32, ptr %1, i64 9472
+  %342 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %340, ptr %341, i64 %4)
+  %343 = getelementptr inbounds i32, ptr %1, i64 9600
+  %344 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %342, ptr %343, i64 %4)
+  %345 = getelementptr inbounds i32, ptr %1, i64 9728
+  %346 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %344, ptr %345, i64 %4)
+  %347 = getelementptr inbounds i32, ptr %1, i64 9856
+  %348 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %346, ptr %347, i64 %4)
+  %349 = getelementptr inbounds i32, ptr %1, i64 9984
+  %350 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %348, ptr %349, i64 %4)
+  %351 = getelementptr inbounds i32, ptr %1, i64 10112
+  %352 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %350, ptr %351, i64 %4)
+  %353 = getelementptr inbounds i32, ptr %1, i64 10240
+  %354 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %352, ptr %353, i64 %4)
+  %355 = getelementptr inbounds i32, ptr %1, i64 10368
+  %356 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  tail call void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32> %354, ptr %355, i64 %4)
+  %357 = tail call <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64 2, i64 0, i64 0, i64 %4)
+  ret void
+}
+
+declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
+declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)
+declare void @llvm.riscv.sf.vc.vv.se.i64.nxv4i32.nxv4i32.i64(i64, i64, <vscale x 4 x i32>, <vscale x 4 x i32>, i64)
+declare <vscale x 4 x i32> @llvm.riscv.sf.vc.v.i.se.nxv4i32.i64.i64.i64(i64, i64, i64, i64)
+declare void @llvm.riscv.vse.nxv4i32.i64(<vscale x 4 x i32>, ptr, i64)


        


More information about the llvm-commits mailing list