[llvm] Greedy: Make eviction broken hint cost use CopyCost units (PR #160084)

Mon Sep 22 05:23:29 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-regalloc

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

Change the eviction advisor heuristic cost based on number of broken
hints to work in units of copy cost, rather than a magic number 1.
The intent is to allow breaking hints for cheap subregisters in favor
of more expensive register tuples.

The llvm.amdgcn.image.dim.gfx90a.ll change shows a simple example of
the case I am attempting to solve. Use of tuples in ABI contexts ends up
looking like this:

  %argN = COPY $vgprN
  %tuple = inst %argN
  $vgpr0 = COPY %tuple.sub0
  $vgpr1 = COPY %tuple.sub1
  $vgpr2 = COPY %tuple.sub2
  $vgpr3 = COPY %tuple.sub3

Since there are physreg copies in the input and output sequence,
both have hints to a physreg. The wider tuple hint on the output
should win though, since this satisfies 4 hints instead of 1.

This is the obvious part of a larger change to better handle
subregister interference with register tuples, and is not sufficient
to handle the original case I am looking at. There are several bugs here
that are proving tricky to untangle. In particular, there is a double
counting bug for all registers with multiple regunits; the cost of breaking
the interfering hint is added for each interfering virtual register,
which have repeat visits across regunits. Fixing this badly regresses
a number of RISCV tests, which seem to rely on overestimating the cost
in tryFindEvictionCandidate to avoid early-exiting the eviction
candidate loop.

---

Patch is 2.40 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160084.diff


7 Files Affected:

- (modified) llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp (+5-3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll (+8-11) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll (+1556-1556) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll (+1892-1892) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll (+1556-1556) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll (+1906-1906) 
- (modified) llvm/test/CodeGen/RISCV/zilsd.ll (+3-4) 


``````````diff

diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index 0d44ddc428570..f2c2f74755ace 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -224,7 +224,7 @@ bool DefaultEvictionAdvisor::canEvictHintInterference(
     const LiveInterval &VirtReg, MCRegister PhysReg,
     const SmallVirtRegSet &FixedRegisters) const {
   EvictionCost MaxCost;
-  MaxCost.setBrokenHints(1);
+  MaxCost.setBrokenHints(MRI->getRegClass(VirtReg.reg())->getCopyCost());
   return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
                                          FixedRegisters);
 }
@@ -300,12 +300,14 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
           return false;
         // We permit breaking cascades for urgent evictions. It should be the
         // last resort, though, so make it really expensive.
-        Cost.BrokenHints += 10;
+        Cost.BrokenHints += 10 * MRI->getRegClass(Intf->reg())->getCopyCost();
       }
       // Would this break a satisfied hint?
       bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
       // Update eviction cost.
-      Cost.BrokenHints += BreaksHint;
+      if (BreaksHint)
+        Cost.BrokenHints += MRI->getRegClass(Intf->reg())->getCopyCost();
+
       Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
       // Abort if this would be too expensive.
       if (Cost >= MaxCost)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
index bb4a607fc62d0..44a4e8171ff33 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
@@ -18,22 +18,19 @@ define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1
 ; GCN-LABEL: load_1d_lwe:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    v_mov_b32_e32 v8, 0
+; GCN-NEXT:    v_mov_b32_e32 v6, v0
 ; GCN-NEXT:    v_mov_b32_e32 v9, v8
 ; GCN-NEXT:    v_mov_b32_e32 v10, v8
 ; GCN-NEXT:    v_mov_b32_e32 v11, v8
 ; GCN-NEXT:    v_mov_b32_e32 v12, v8
-; GCN-NEXT:    v_mov_b32_e32 v2, v8
-; GCN-NEXT:    v_mov_b32_e32 v3, v9
-; GCN-NEXT:    v_mov_b32_e32 v4, v10
-; GCN-NEXT:    v_mov_b32_e32 v5, v11
-; GCN-NEXT:    v_mov_b32_e32 v6, v12
-; GCN-NEXT:    image_load v[2:6], v0, s[0:7] dmask:0xf unorm lwe
+; GCN-NEXT:    v_mov_b32_e32 v0, v8
+; GCN-NEXT:    v_mov_b32_e32 v1, v9
+; GCN-NEXT:    v_mov_b32_e32 v2, v10
+; GCN-NEXT:    v_mov_b32_e32 v3, v11
+; GCN-NEXT:    v_mov_b32_e32 v4, v12
+; GCN-NEXT:    image_load v[0:4], v6, s[0:7] dmask:0xf unorm lwe
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v0, v2
-; GCN-NEXT:    v_mov_b32_e32 v1, v3
-; GCN-NEXT:    v_mov_b32_e32 v2, v4
-; GCN-NEXT:    v_mov_b32_e32 v3, v5
-; GCN-NEXT:    global_store_dword v8, v6, s[8:9]
+; GCN-NEXT:    global_store_dword v8, v4, s[8:9]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    ; return to shader part epilog
 main_body:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
index 68d3f85cd1f23..6d70d191ba8b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
@@ -9,8 +9,8 @@ define <vscale x 1 x i8> @test_vloxseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv1i8_2t.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, <vscale x 1 x i8> %index, i32 %vl, i32 3)
@@ -22,8 +22,8 @@ define <vscale x 1 x i8> @test_vloxseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -38,8 +38,8 @@ define <vscale x 1 x i8> @test_vloxseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg2ei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei16.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv1i8_2t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, <vscale x 1 x i16> %index, i32 %vl, i32 3)
@@ -51,8 +51,8 @@ define <vscale x 1 x i8> @test_vloxseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg2ei16.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei16.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, <vscale x 1 x i16> %index, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -67,8 +67,8 @@ define <vscale x 1 x i8> @test_vloxseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg2ei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei32.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv1i8_2t.nxv1i32(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, <vscale x 1 x i32> %index, i32 %vl, i32 3)
@@ -80,8 +80,8 @@ define <vscale x 1 x i8> @test_vloxseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg2ei32.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei32.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1.nxv1i32(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) poison, ptr %base, <vscale x 1 x i32> %index, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -96,8 +96,8 @@ define <vscale x 2 x i8> @test_vloxseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.nxv2i8(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) poison, ptr %base, <vscale x 2 x i8> %index, i32 %vl, i32 3)
@@ -109,8 +109,8 @@ define <vscale x 2 x i8> @test_vloxseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1.nxv2i8(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) poison, ptr %base, <vscale x 2 x i8> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -125,8 +125,8 @@ define <vscale x 2 x i8> @test_vloxseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxseg2ei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei16.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.nxv2i16(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) poison, ptr %base, <vscale x 2 x i16> %index, i32 %vl, i32 3)
@@ -138,8 +138,8 @@ define <vscale x 2 x i8> @test_vloxseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxseg2ei16.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei16.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1.nxv2i16(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) poison, ptr %base, <vscale x 2 x i16> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -154,8 +154,8 @@ define <vscale x 2 x i8> @test_vloxseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxseg2ei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei32.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv2i8_2t.nxv2i32(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) poison, ptr %base, <vscale x 2 x i32> %index, i32 %vl, i32 3)
@@ -167,8 +167,8 @@ define <vscale x 2 x i8> @test_vloxseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT:    vloxseg2ei32.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei32.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1.nxv2i32(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) poison, ptr %base, <vscale x 2 x i32> %index, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -183,8 +183,8 @@ define <vscale x 4 x i8> @test_vloxseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.nxv4i8(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr %base, <vscale x 4 x i8> %index, i32 %vl, i32 3)
@@ -196,8 +196,8 @@ define <vscale x 4 x i8> @test_vloxseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1.nxv4i8(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr %base, <vscale x 4 x i8> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -212,8 +212,8 @@ define <vscale x 4 x i8> @test_vloxseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vloxseg2ei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei16.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv4i8_2t.nxv4i16(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr %base, <vscale x 4 x i16> %index, i32 %vl, i32 3)
@@ -225,8 +225,8 @@ define <vscale x 4 x i8> @test_vloxseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT:    vloxseg2ei16.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei16.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1.nxv4i16(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) poison, ptr %base, <vscale x 4 x i16> %index, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -270,8 +270,8 @@ define <vscale x 8 x i8> @test_vloxseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t_nxv
 ; CHECK-LABEL: test_vloxseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vloxseg2.triscv.vector.tuple_nxv8i8_2t.nxv8i8(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) poison, ptr %base, <vscale x 8 x i8> %index, i32 %vl, i32 3)
@@ -283,8 +283,8 @@ define <vscale x 8 x i8> @test_vloxseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2
 ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT:    vloxseg2ei8.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vloxseg2ei8.v v7, (a0), v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vloxseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1.nxv8i8(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) poison, ptr %base, <vscale x 8 x i8> %index, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -502,8 +502,8 @@ define <vscale x 1 x i8> @test_vloxseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv
 ; CHECK-LABEL: test_vloxseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg3ei8.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vloxseg3ei8.v v7, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv1i8_3t.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) poison, ptr %base, <vscale x 1 x i8> %index, i32 %vl, i32 3)
@@ -515,8 +515,8 @@ define <vscale x 1 x i8> @test_vloxseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3
 ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg3ei8.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vloxseg3ei8.v v7, (a0), v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1.nxv1i8(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) poison, ptr %base, <vscale x 1 x i8> %index, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -531,8 +531,8 @@ define <vscale x 1 x i8> @test_vloxseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv
 ; CHECK-LABEL: test_vloxseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg3ei16.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vloxseg3ei16.v v7, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv1i8_3t.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) poison, ptr %base, <vscale x 1 x i16> %index, i32 %vl, i32 3)
@@ -544,8 +544,8 @@ define <vscale x 1 x i8> @test_vloxseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3
 ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg3ei16.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vloxseg3ei16.v v7, (a0), v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1.nxv1i16(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) poison, ptr %base, <vscale x 1 x i16> %index, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
@@ -560,8 +560,8 @@ define <vscale x 1 x i8> @test_vloxseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv
 ; CHECK-LABEL: test_vloxseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg3ei32.v v9, (a0), v8
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vloxseg3ei32.v v7, (a0), v10
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vloxseg3.triscv.vector.tuple_nxv1i8_3t.nxv1i32(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) poison, ptr %base, <vscale x 1 x i32> %index, i32 %vl, i32 3)
@@ -573,8 +573,8 @@ define <vscale x 1 x i8> @test_vloxseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3
 ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT:    vloxseg3ei32.v v9, (a0), v8, v0.t
-; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vloxseg3ei32.v v7, (a0), v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vloxseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1.nxv1i32(target("riscv....
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/160084