[llvm] [RISCV] Correct the limit of RegPresureSet `GPRAll` (PR #118473)

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 03:44:10 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Pengcheng Wang (wangpc-pp)

<details>
<summary>Changes</summary>

The generated limit is 33, which is the total number of scalar registers
plus 1 (for `DUMMY_REG_PAIR_WITH_X0`).

This is not right as not all scalar registers can be used. There are 4-6
reserved registers, so we need to adjust the limit by the reserved set.

This change has impacts on instruction scheduling, MachineLICM, etc.

Here are the statistics of spills/reloads on `llvm-test-suite` with
`-O3 -march=rva23u64`:

```
Metric: regalloc.NumSpills,regalloc.NumReloads

Program                                       regalloc.NumSpills                  regalloc.NumReloads
                                              baseline           after    diff    baseline            after    diff
External/S...NT2017rate/502.gcc_r/502.gcc_r   11812.00           11338.00 -474.00 26813.00            25751.00 -1062.00
External/S...T2017speed/602.gcc_s/602.gcc_s   11812.00           11338.00 -474.00 26813.00            25751.00 -1062.00
External/S...te/526.blender_r/526.blender_r   13514.00           13228.00 -286.00 27456.00            27260.00  -196.00
External/S...00.perlbench_s/600.perlbench_s    4398.00            4274.00 -124.00  9745.00             9341.00  -404.00
External/S...00.perlbench_r/500.perlbench_r    4398.00            4274.00 -124.00  9745.00             9341.00  -404.00
SingleSour...nchmarks/Adobe-C++/loop_unroll    1533.00            1413.00 -120.00  2943.00             2633.00  -310.00
External/S...rate/510.parest_r/510.parest_r   43985.00           43879.00 -106.00 87409.00            87309.00  -100.00
External/S...te/538.imagick_r/538.imagick_r    4160.00            4060.00 -100.00 10338.00            10244.00   -94.00
External/S...ed/638.imagick_s/638.imagick_s    4160.00            4060.00 -100.00 10338.00            10244.00   -94.00
MultiSourc...e/Applications/ClamAV/clamscan    2120.00            2023.00  -97.00  5035.00             4901.00  -134.00
MultiSourc...sumer-typeset/consumer-typeset    1218.00            1129.00  -89.00  3041.00             2887.00  -154.00
MultiSourc.../Applications/JM/ldecod/ldecod    1341.00            1263.00  -78.00  2316.00             2238.00   -78.00
External/S...rate/511.povray_r/511.povray_r    1734.00            1659.00  -75.00  3413.00             3246.00  -167.00
MultiSource/Applications/SPASS/SPASS           1442.00            1376.00  -66.00  2954.00             2837.00  -117.00
MultiSourc.../DOE-ProxyApps-C++/CLAMR/CLAMR    1628.00            1568.00  -60.00  3026.00             2958.00   -68.00
      regalloc.NumSpills                            regalloc.NumReloads
run             baseline         after         diff            baseline         after         diff
mean   86.725206          85.041122    -1.684083     1363.122137         1342.900383  -3.212869
```

Co-authored-by: BoyaoWang430 <wangboyao@<!-- -->bytedance.com>


---

Patch is 375.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118473.diff


9 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp (+14) 
- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.h (+2) 
- (modified) llvm/test/CodeGen/RISCV/pr69586.ll (+368-453) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+27-51) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll (+1104-1104) 
- (modified) llvm/test/CodeGen/RISCV/rvv/intrinsic-vector-match.ll (+312-388) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll (+3-2) 
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll (+1608-1632) 
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll (+317-329) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index cfcc3119257f65..a73bd1621a739d 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -934,3 +934,17 @@ bool RISCVRegisterInfo::getRegAllocationHints(
 
   return BaseImplRetVal;
 }
+
+unsigned RISCVRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+                                                   unsigned Idx) const {
+  if (Idx == RISCV::RegisterPressureSets::GPRAll) {
+    unsigned Reserved = 0;
+    BitVector ReservedRegs = getReservedRegs(MF);
+    for (MCPhysReg Reg = RISCV::X0_H; Reg <= RISCV::X31_H; Reg++)
+      if (ReservedRegs.test(Reg))
+        Reserved++;
+
+    return 32 - Reserved;
+  }
+  return RISCVGenRegisterInfo::getRegPressureSetLimit(MF, Idx);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 3ab79694e175c8..ca4934de2f52d2 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -144,6 +144,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
   static bool isRVVRegClass(const TargetRegisterClass *RC) {
     return RISCVRI::isVRegClass(RC->TSFlags);
   }
+  unsigned getRegPressureSetLimit(const MachineFunction &MF,
+                                  unsigned Idx) const override;
 };
 } // namespace llvm
 
diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll
index 9fc9a3c42867e7..21e64ada7061aa 100644
--- a/llvm/test/CodeGen/RISCV/pr69586.ll
+++ b/llvm/test/CodeGen/RISCV/pr69586.ll
@@ -44,59 +44,50 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    addi a5, a7, 512
 ; NOREMAT-NEXT:    addi a4, a7, 1024
 ; NOREMAT-NEXT:    addi a6, a7, 1536
-; NOREMAT-NEXT:    li t4, 1
-; NOREMAT-NEXT:    li a2, 5
-; NOREMAT-NEXT:    li t1, 3
-; NOREMAT-NEXT:    li t0, 7
-; NOREMAT-NEXT:    lui t5, 1
-; NOREMAT-NEXT:    li s4, 9
-; NOREMAT-NEXT:    li s6, 11
-; NOREMAT-NEXT:    li s9, 13
-; NOREMAT-NEXT:    li ra, 15
-; NOREMAT-NEXT:    lui t2, 2
-; NOREMAT-NEXT:    lui s1, 3
-; NOREMAT-NEXT:    lui t3, 4
-; NOREMAT-NEXT:    lui s0, 5
-; NOREMAT-NEXT:    lui s3, 6
-; NOREMAT-NEXT:    lui s7, 7
+; NOREMAT-NEXT:    li t1, 1
+; NOREMAT-NEXT:    li a3, 5
+; NOREMAT-NEXT:    li t0, 3
+; NOREMAT-NEXT:    li a2, 7
+; NOREMAT-NEXT:    lui t2, 1
+; NOREMAT-NEXT:    li s5, 9
+; NOREMAT-NEXT:    li s8, 11
+; NOREMAT-NEXT:    lui s1, 2
+; NOREMAT-NEXT:    lui t5, 3
+; NOREMAT-NEXT:    lui s11, 4
+; NOREMAT-NEXT:    lui ra, 5
+; NOREMAT-NEXT:    lui t3, 6
+; NOREMAT-NEXT:    lui s0, 7
 ; NOREMAT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOREMAT-NEXT:    slli t4, t4, 11
-; NOREMAT-NEXT:    sd t4, 512(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    slli a3, a2, 9
-; NOREMAT-NEXT:    sd a3, 504(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    slli t6, t1, 10
-; NOREMAT-NEXT:    slli s2, t0, 9
-; NOREMAT-NEXT:    add a0, a7, t5
-; NOREMAT-NEXT:    lui s11, 1
-; NOREMAT-NEXT:    slli s4, s4, 9
-; NOREMAT-NEXT:    slli s5, a2, 10
-; NOREMAT-NEXT:    slli s6, s6, 9
-; NOREMAT-NEXT:    slli s8, t1, 11
+; NOREMAT-NEXT:    slli t4, t1, 11
+; NOREMAT-NEXT:    slli t6, a3, 9
+; NOREMAT-NEXT:    slli s2, t0, 10
+; NOREMAT-NEXT:    slli s4, a2, 9
+; NOREMAT-NEXT:    add a0, a7, t2
 ; NOREMAT-NEXT:    vle32.v v8, (a5)
-; NOREMAT-NEXT:    slli s9, s9, 9
-; NOREMAT-NEXT:    li t5, 13
+; NOREMAT-NEXT:    slli s5, s5, 9
 ; NOREMAT-NEXT:    vle32.v v10, (a4)
 ; NOREMAT-NEXT:    vle32.v v2, (a4)
-; NOREMAT-NEXT:    slli s10, t0, 10
+; NOREMAT-NEXT:    slli s6, a3, 10
 ; NOREMAT-NEXT:    vle32.v v0, (a6)
 ; NOREMAT-NEXT:    vle32.v v12, (a6)
-; NOREMAT-NEXT:    slli ra, ra, 9
+; NOREMAT-NEXT:    slli s8, s8, 9
+; NOREMAT-NEXT:    slli s9, t0, 11
 ; NOREMAT-NEXT:    vle32.v v4, (a0)
 ; NOREMAT-NEXT:    vle32.v v20, (a0)
-; NOREMAT-NEXT:    add a4, a7, t2
+; NOREMAT-NEXT:    add a4, a7, s1
 ; NOREMAT-NEXT:    vle32.v v6, (a4)
 ; NOREMAT-NEXT:    vle32.v v30, (a4)
-; NOREMAT-NEXT:    add a4, a7, s1
+; NOREMAT-NEXT:    add a4, a7, t5
 ; NOREMAT-NEXT:    vle32.v v28, (a4)
 ; NOREMAT-NEXT:    vle32.v v26, (a4)
-; NOREMAT-NEXT:    add a4, a7, t3
+; NOREMAT-NEXT:    add a4, a7, s11
 ; NOREMAT-NEXT:    vle32.v v24, (a4)
 ; NOREMAT-NEXT:    vle32.v v22, (a4)
-; NOREMAT-NEXT:    add a4, a7, s0
+; NOREMAT-NEXT:    add a4, a7, ra
 ; NOREMAT-NEXT:    vle32.v v14, (a7)
 ; NOREMAT-NEXT:    vle32.v v18, (a4)
 ; NOREMAT-NEXT:    vle32.v v16, (a4)
-; NOREMAT-NEXT:    add a4, a7, s3
+; NOREMAT-NEXT:    add a4, a7, t3
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v8
 ; NOREMAT-NEXT:    vle32.v v14, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
@@ -107,78 +98,86 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v10, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v2, v0
 ; NOREMAT-NEXT:    vle32.v v2, (a4)
-; NOREMAT-NEXT:    add a4, a7, a3
+; NOREMAT-NEXT:    add a4, a7, t6
 ; NOREMAT-NEXT:    vle32.v v0, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v10
 ; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    add a4, a7, t6
+; NOREMAT-NEXT:    add a4, a7, s2
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v2, v0
 ; NOREMAT-NEXT:    vle32.v v2, (a4)
-; NOREMAT-NEXT:    add a4, a7, s2
+; NOREMAT-NEXT:    add a4, a7, s4
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    add a4, a7, s7
+; NOREMAT-NEXT:    add a4, a7, s0
 ; NOREMAT-NEXT:    vle32.v v0, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v2, v8
 ; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    add a4, a7, s4
+; NOREMAT-NEXT:    add a4, a7, s5
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    add a4, a7, s5
+; NOREMAT-NEXT:    add a4, a7, s6
 ; NOREMAT-NEXT:    vle32.v v4, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v8
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    add a4, a7, s6
+; NOREMAT-NEXT:    add a4, a7, s8
 ; NOREMAT-NEXT:    vle32.v v20, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    add a4, a7, s8
+; NOREMAT-NEXT:    add a4, a7, s9
 ; NOREMAT-NEXT:    vle32.v v4, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v20
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    add a4, a7, s9
+; NOREMAT-NEXT:    li t5, 13
+; NOREMAT-NEXT:    slli a4, t5, 9
+; NOREMAT-NEXT:    sd a4, 624(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a7, a4
 ; NOREMAT-NEXT:    vle32.v v20, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    add a4, a7, s10
+; NOREMAT-NEXT:    slli a4, a2, 10
+; NOREMAT-NEXT:    sd a4, 616(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a7, a4
 ; NOREMAT-NEXT:    vle32.v v4, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v20
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    add a4, a7, ra
+; NOREMAT-NEXT:    li a6, 15
+; NOREMAT-NEXT:    slli a4, a6, 9
+; NOREMAT-NEXT:    sd a4, 608(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a4, a7, a4
 ; NOREMAT-NEXT:    vle32.v v2, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
-; NOREMAT-NEXT:    lui t4, 8
-; NOREMAT-NEXT:    add a5, a7, t4
+; NOREMAT-NEXT:    lui t1, 8
+; NOREMAT-NEXT:    add a5, a7, t1
 ; NOREMAT-NEXT:    vle32.v v20, (a5)
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v2
 ; NOREMAT-NEXT:    li a4, 17
 ; NOREMAT-NEXT:    slli a4, a4, 9
-; NOREMAT-NEXT:    li s1, 17
-; NOREMAT-NEXT:    sd a4, 624(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li t2, 17
+; NOREMAT-NEXT:    sd a4, 600(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a7, a4
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
 ; NOREMAT-NEXT:    vle32.v v4, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v6
 ; NOREMAT-NEXT:    li a5, 9
 ; NOREMAT-NEXT:    slli a4, a5, 10
-; NOREMAT-NEXT:    sd a4, 616(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a4, 592(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a7, a4
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
 ; NOREMAT-NEXT:    vle32.v v6, (a4)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v8
 ; NOREMAT-NEXT:    li a4, 19
 ; NOREMAT-NEXT:    slli a4, a4, 9
-; NOREMAT-NEXT:    li t2, 19
-; NOREMAT-NEXT:    sd a4, 608(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li s1, 19
+; NOREMAT-NEXT:    sd a4, 584(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a7, a4
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
 ; NOREMAT-NEXT:    vle32.v v30, (a4)
-; NOREMAT-NEXT:    slli a3, a2, 11
-; NOREMAT-NEXT:    sd a3, 600(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    slli a3, a3, 11
+; NOREMAT-NEXT:    sd a3, 576(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v12
 ; NOREMAT-NEXT:    add a3, a7, a3
 ; NOREMAT-NEXT:    vle32.v v12, (a3)
@@ -186,46 +185,45 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v8
 ; NOREMAT-NEXT:    li s7, 21
 ; NOREMAT-NEXT:    slli a3, s7, 9
-; NOREMAT-NEXT:    sd a3, 592(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a3, 568(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a7, a3
 ; NOREMAT-NEXT:    vle32.v v8, (a3)
 ; NOREMAT-NEXT:    vle32.v v6, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v12
-; NOREMAT-NEXT:    li a6, 11
-; NOREMAT-NEXT:    slli a3, a6, 10
-; NOREMAT-NEXT:    sd a3, 584(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a4, 11
+; NOREMAT-NEXT:    slli a3, a4, 10
+; NOREMAT-NEXT:    sd a3, 560(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a7, a3
 ; NOREMAT-NEXT:    vle32.v v12, (a3)
 ; NOREMAT-NEXT:    vle32.v v30, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v8
 ; NOREMAT-NEXT:    li s3, 23
-; NOREMAT-NEXT:    slli a3, s3, 9
-; NOREMAT-NEXT:    sd a3, 576(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a3, a7, a3
+; NOREMAT-NEXT:    slli s10, s3, 9
+; NOREMAT-NEXT:    add a3, a7, s10
 ; NOREMAT-NEXT:    vle32.v v8, (a3)
 ; NOREMAT-NEXT:    vle32.v v4, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v12
 ; NOREMAT-NEXT:    li s0, 25
 ; NOREMAT-NEXT:    slli a3, s0, 9
-; NOREMAT-NEXT:    sd a3, 568(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a3, 552(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a7, a3
 ; NOREMAT-NEXT:    vle32.v v12, (a3)
 ; NOREMAT-NEXT:    vle32.v v6, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v8
 ; NOREMAT-NEXT:    slli a3, t5, 10
-; NOREMAT-NEXT:    sd a3, 560(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a3, 544(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a7, a3
 ; NOREMAT-NEXT:    vle32.v v8, (a3)
 ; NOREMAT-NEXT:    vle32.v v30, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v28
 ; NOREMAT-NEXT:    li t3, 27
 ; NOREMAT-NEXT:    slli a3, t3, 9
-; NOREMAT-NEXT:    sd a3, 552(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a3, 536(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a7, a3
 ; NOREMAT-NEXT:    vle32.v v28, (a3)
 ; NOREMAT-NEXT:    vle32.v v4, (a3)
-; NOREMAT-NEXT:    slli a2, t0, 11
-; NOREMAT-NEXT:    sd a2, 544(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    slli a2, a2, 11
+; NOREMAT-NEXT:    sd a2, 528(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v12
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
@@ -233,39 +231,37 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v8
 ; NOREMAT-NEXT:    li t0, 29
 ; NOREMAT-NEXT:    slli a2, t0, 9
-; NOREMAT-NEXT:    sd a2, 536(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a2, 520(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
 ; NOREMAT-NEXT:    vle32.v v6, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v28
-; NOREMAT-NEXT:    li a3, 15
-; NOREMAT-NEXT:    slli a2, a3, 10
-; NOREMAT-NEXT:    sd a2, 528(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    slli a2, a6, 10
+; NOREMAT-NEXT:    sd a2, 512(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v28, (a2)
 ; NOREMAT-NEXT:    vle32.v v30, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v12
-; NOREMAT-NEXT:    li t1, 31
-; NOREMAT-NEXT:    slli a2, t1, 9
-; NOREMAT-NEXT:    sd a2, 520(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a7, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    vle32.v v4, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v8
-; NOREMAT-NEXT:    lui a4, 4
-; NOREMAT-NEXT:    addiw a0, a4, 512
-; NOREMAT-NEXT:    sd a0, 496(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a3, 31
+; NOREMAT-NEXT:    slli a0, a3, 9
+; NOREMAT-NEXT:    sd a0, 504(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a0, a7, a0
-; NOREMAT-NEXT:    vle32.v v8, (a0)
-; NOREMAT-NEXT:    vle32.v v26, (a0)
+; NOREMAT-NEXT:    vle32.v v12, (a0)
+; NOREMAT-NEXT:    vle32.v v4, (a0)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v8
+; NOREMAT-NEXT:    addiw a2, s11, 512
+; NOREMAT-NEXT:    sd a2, 496(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a7, a2
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    vle32.v v26, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v28
-; NOREMAT-NEXT:    slli a2, s1, 10
+; NOREMAT-NEXT:    slli a2, t2, 10
 ; NOREMAT-NEXT:    sd a2, 488(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v28, (a2)
 ; NOREMAT-NEXT:    vle32.v v6, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v12
-; NOREMAT-NEXT:    addiw a2, a4, 1536
+; NOREMAT-NEXT:    addiw a2, s11, 1536
 ; NOREMAT-NEXT:    sd a2, 480(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
@@ -277,27 +273,25 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v24, (a2)
 ; NOREMAT-NEXT:    vle32.v v4, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v8
-; NOREMAT-NEXT:    lui a5, 5
-; NOREMAT-NEXT:    addiw a2, a5, -1536
+; NOREMAT-NEXT:    addiw a2, ra, -1536
 ; NOREMAT-NEXT:    sd a2, 464(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
 ; NOREMAT-NEXT:    vle32.v v22, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v28
-; NOREMAT-NEXT:    slli a2, t2, 10
+; NOREMAT-NEXT:    slli a2, s1, 10
 ; NOREMAT-NEXT:    sd a2, 456(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    li t2, 19
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v26, (a2)
 ; NOREMAT-NEXT:    vle32.v v28, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v12
-; NOREMAT-NEXT:    addiw a2, a5, -512
+; NOREMAT-NEXT:    addiw a2, ra, -512
 ; NOREMAT-NEXT:    sd a2, 448(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    vle32.v v6, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v24
-; NOREMAT-NEXT:    addiw a2, a5, 512
+; NOREMAT-NEXT:    addiw a2, ra, 512
 ; NOREMAT-NEXT:    sd a2, 440(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v24, (a2)
@@ -309,20 +303,20 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
 ; NOREMAT-NEXT:    vle32.v v4, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v26
-; NOREMAT-NEXT:    addiw a2, a5, 1536
+; NOREMAT-NEXT:    addiw a2, ra, 1536
 ; NOREMAT-NEXT:    sd a2, 424(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v22, (a2)
 ; NOREMAT-NEXT:    vle32.v v26, (a2)
-; NOREMAT-NEXT:    slli a2, a6, 11
+; NOREMAT-NEXT:    slli a2, a4, 11
 ; NOREMAT-NEXT:    sd a2, 416(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v28, v12
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    vle32.v v28, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v18
-; NOREMAT-NEXT:    lui a6, 6
-; NOREMAT-NEXT:    addiw a2, a6, -1536
+; NOREMAT-NEXT:    lui a4, 6
+; NOREMAT-NEXT:    addiw a2, a4, -1536
 ; NOREMAT-NEXT:    sd a2, 408(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v18, (a2)
@@ -334,13 +328,13 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v16, (a2)
 ; NOREMAT-NEXT:    vle32.v v24, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v8
-; NOREMAT-NEXT:    addiw a2, a6, -512
+; NOREMAT-NEXT:    addiw a2, a4, -512
 ; NOREMAT-NEXT:    sd a2, 392(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
 ; NOREMAT-NEXT:    vle32.v v30, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v22
-; NOREMAT-NEXT:    addiw a2, a6, 512
+; NOREMAT-NEXT:    addiw a2, a4, 512
 ; NOREMAT-NEXT:    sd a2, 384(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v22, (a2)
@@ -352,7 +346,7 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v26, (a2)
 ; NOREMAT-NEXT:    vle32.v v2, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v28, v18
-; NOREMAT-NEXT:    addiw a2, a6, 1536
+; NOREMAT-NEXT:    addiw a2, a4, 1536
 ; NOREMAT-NEXT:    sd a2, 368(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v18, (a2)
@@ -364,8 +358,8 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v16, (a2)
 ; NOREMAT-NEXT:    vle32.v v6, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v8
-; NOREMAT-NEXT:    lui s0, 7
-; NOREMAT-NEXT:    addiw a2, s0, -1536
+; NOREMAT-NEXT:    lui a5, 7
+; NOREMAT-NEXT:    addiw a2, a5, -1536
 ; NOREMAT-NEXT:    sd a2, 352(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
@@ -379,15 +373,14 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    addi a0, sp, 640
 ; NOREMAT-NEXT:    vl2r.v v12, (a0) # Unknown-size Folded Reload
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v22
-; NOREMAT-NEXT:    addiw a2, s0, -512
+; NOREMAT-NEXT:    addiw a2, a5, -512
 ; NOREMAT-NEXT:    sd a2, 336(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v22, (a2)
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v26
-; NOREMAT-NEXT:    addiw a2, s0, 512
+; NOREMAT-NEXT:    addiw a2, a5, 512
 ; NOREMAT-NEXT:    sd a2, 328(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    lui t3, 7
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v26, (a2)
 ; NOREMAT-NEXT:    vle32.v v4, (a2)
@@ -398,30 +391,30 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v18, (a2)
 ; NOREMAT-NEXT:    vle32.v v2, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v28, v16
-; NOREMAT-NEXT:    addiw a2, t3, 1536
+; NOREMAT-NEXT:    addiw a2, a5, 1536
 ; NOREMAT-NEXT:    sd a2, 312(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v16, (a2)
 ; NOREMAT-NEXT:    vle32.v v28, (a2)
-; NOREMAT-NEXT:    slli a2, a3, 11
+; NOREMAT-NEXT:    slli a2, a6, 11
 ; NOREMAT-NEXT:    sd a2, 304(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v8
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
 ; NOREMAT-NEXT:    vle32.v v6, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v14
-; NOREMAT-NEXT:    addiw a2, t4, -1536
+; NOREMAT-NEXT:    addiw a2, t1, -1536
 ; NOREMAT-NEXT:    sd a2, 296(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v14, (a2)
 ; NOREMAT-NEXT:    vle32.v v24, (a2)
-; NOREMAT-NEXT:    slli a2, t1, 10
+; NOREMAT-NEXT:    slli a2, a3, 10
 ; NOREMAT-NEXT:    sd a2, 288(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v22
 ; NOREMAT-NEXT:    add a2, a7, a2
 ; NOREMAT-NEXT:    vle32.v v22, (a2)
 ; NOREMAT-NEXT:    vle32.v v30, (a2)
-; NOREMAT-NEXT:    addiw a0, t4, -512
+; NOREMAT-NEXT:    addiw a0, t1, -512
 ; NOREMAT-NEXT:    sd a0, 280(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a0, a7, a0
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v0
@@ -438,32 +431,33 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    addi a0, a1, 1024
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    add s11, a1, s11
-; NOREMAT-NEXT:    sd s11, 272(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    lui a0, 1
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sd a0, 272(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    lui a0, 2
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sd a0, 264(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    lui a0, 3
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sd a0, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add s11, a1, s11
+; NOREMAT-NEXT:    sd s11, 248(sp) # 8-byte Folded Spill
+; NOREM...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/118473


More information about the llvm-commits mailing list