[llvm] [RISCV] Rematerialize vmv.s.x and vfmv.s.f (PR #108012)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 10 05:12:34 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
Continuing with #<!-- -->107993 and #<!-- -->108007, this handles the last of the main rematerializable vector instructions.
Program regalloc.NumSpills regalloc.NumReloads regalloc.NumRemats
lhs rhs diff lhs rhs diff lhs rhs diff
508.namd_r 6598.00 6598.00 0.0% 15509.00 15509.00 0.0% 2387.00 2387.00 0.0%
505.mcf_r 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0%
641.leela_s 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0%
631.deepsjeng_s 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0%
623.xalancbmk_s 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0%
620.omnetpp_s 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0%
605.mcf_s 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0%
557.xz_r 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0%
541.leela_r 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0%
531.deepsjeng_r 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0%
520.omnetpp_r 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0%
523.xalancbmk_r 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0%
619.lbm_s 68.00 68.00 0.0% 70.00 70.00 0.0% 1.00 1.00 0.0%
519.lbm_r 73.00 73.00 0.0% 75.00 75.00 0.0% 1.00 1.00 0.0%
657.xz_s 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0%
511.povray_r 1937.00 1936.00 -0.1% 3629.00 3628.00 -0.0% 517.00 518.00 0.2%
502.gcc_r 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0%
602.gcc_s 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0%
638.imagick_s 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1%
538.imagick_r 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1%
500.perlbench_r 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0%
600.perlbench_s 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0%
525.x264_r 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0%
625.x264_s 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0%
510.parest_r 42740.00 42689.00 -0.1% 82400.00 82252.00 -0.2% 5612.00 5620.00 0.1%
644.nab_s 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0%
544.nab_r 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0%
526.blender_r 13105.00 13084.00 -0.2% 26478.00 26442.00 -0.1% 18991.00 18989.00 -0.0%
Geomean difference -0.0% -0.0% 0.0%
There's an extra spill in one of the test cases, but it's likely noise from the spill weights and isn't an issue in practice.
---
Patch is 53.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108012.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+2)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (+330-310)
- (modified) llvm/test/CodeGen/RISCV/rvv/remat.ll (+130)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 325a50c9f48a1c..d9ab1257863d53 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -169,6 +169,8 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ case RISCV::VMV_S_X:
+ case RISCV::VFMV_S_F:
case RISCV::VMV_V_I:
case RISCV::VID_V:
if (MI.getOperand(1).isUndef() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index e11f176bfe6041..1e398166adeb32 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6762,7 +6762,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
Pseudo<(outs GPR:$rd), (ins VR:$rs2, ixlenimm:$sew), []>,
Sched<[WriteVMovXS, ReadVMovXS]>,
RISCVVPseudo;
- let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X,
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1,
Constraints = "$rd = $rs1" in
def PseudoVMV_S_X: Pseudo<(outs VR:$rd),
(ins VR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),
@@ -6785,7 +6785,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
(ins VR:$rs2, ixlenimm:$sew), []>,
Sched<[WriteVMovFS, ReadVMovFS]>,
RISCVVPseudo;
- let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F,
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
Constraints = "$rd = $rs1" in
def "PseudoVFMV_S_" # f.FX :
Pseudo<(outs VR:$rd),
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index bc3e135a588a6f..eff56e408d6d51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -159,296 +159,308 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 80
+; RV32-NEXT: li a3, 84
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: sub sp, sp, a2
-; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 80 * vlenb
+; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 84 * vlenb
; RV32-NEXT: addi a3, a1, 256
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vle32.v v16, (a3)
+; RV32-NEXT: vle32.v v8, (a3)
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 6
+; RV32-NEXT: li a4, 76
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: addi a3, a1, 128
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vslideup.vi v8, v16, 4
+; RV32-NEXT: vslideup.vi v4, v8, 4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 40
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v4, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 12
; RV32-NEXT: vmv.s.x v0, a4
-; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v16, v16, 16
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 56
+; RV32-NEXT: li a5, 24
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vmv1r.v v3, v0
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t
+; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 16
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 44
+; RV32-NEXT: li a5, 48
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; RV32-NEXT: vslideup.vi v4, v8, 10, v0.t
; RV32-NEXT: lui a4, %hi(.LCPI6_0)
; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v8, (a4)
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v0, (a4)
; RV32-NEXT: lui a4, %hi(.LCPI6_1)
; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1)
; RV32-NEXT: lui a5, 1
; RV32-NEXT: vle16.v v8, (a4)
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a6, 24
+; RV32-NEXT: li a6, 56
; RV32-NEXT: mul a4, a4, a6
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 72
+; RV32-NEXT: li a4, 68
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vle32.v v24, (a3)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 60
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; RV32-NEXT: addi a1, a5, -64
-; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vmv.s.x v16, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 36
+; RV32-NEXT: li a3, 44
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v16, v8, v4
+; RV32-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vrgatherei16.vv v16, v8, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 44
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t
+; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 44
+; RV32-NEXT: li a3, 56
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: vmv.v.v v4, v16
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 44
+; RV32-NEXT: li a3, 36
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vslideup.vi v12, v8, 2
-; RV32-NEXT: vmv1r.v v8, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: li a3, 24
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 56
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vslideup.vi v12, v16, 8, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 56
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_2)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2)
; RV32-NEXT: lui a3, %hi(.LCPI6_3)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3)
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; RV32-NEXT: vle16.v v0, (a1)
-; RV32-NEXT: vle16.v v4, (a3)
+; RV32-NEXT: vle16.v v12, (a1)
+; RV32-NEXT: vle16.v v8, (a3)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 28
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_4)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v10, (a1)
+; RV32-NEXT: vle16.v v2, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a3, 68
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v16, v0
+; RV32-NEXT: vrgatherei16.vv v24, v16, v12
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 36
+; RV32-NEXT: li a3, 44
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 60
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v24, v16, v4, v0.t
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 28
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v24, v8, v4, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 56
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v12, v24
+; RV32-NEXT: vmv.v.v v8, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 36
+; RV32-NEXT: li a3, 56
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v12, v24, v10
-; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: vrgatherei16.vv v8, v24, v2
+; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 56
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v12, v24, 6, v0.t
+; RV32-NEXT: vslideup.vi v8, v24, 6, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a3, 44
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_5)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5)
; RV32-NEXT: lui a3, %hi(.LCPI6_6)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v12, (a1)
-; RV32-NEXT: vle16.v v8, (a3)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
-; RV32-NEXT: mul a1, a1, a3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v24, (a1)
+; RV32-NEXT: vle16.v v4, (a3)
; RV32-NEXT: li a1, 960
-; RV32-NEXT: vmv.s.x v8, a1
+; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v24, v0, v12
-; RV32-NEXT: vmv1r.v v3, v8
-; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vrgatherei16.vv v8, v16, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
+; RV32-NEXT: li a3, 60
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 28
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7)
; RV32-NEXT: lui a3, %hi(.LCPI6_8)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v8, (a1)
+; RV32-NEXT: vle16.v v16, (a1)
; RV32-NEXT: lui a1, %hi(.LCPI6_9)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9)
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; RV32-NEXT: vle16.v v4, (a3)
-; RV32-NEXT: vle16.v v12, (a1)
+; RV32-NEXT: vle16.v v8, (a3)
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 2
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 16
+; RV32-NEXT: vs4r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v12, v24, v8
+; RV32-NEXT: vrgatherei16.vv v12, v8, v16
+; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 56
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vmv4r.v v24, v16
; RV32-NEXT: vslideup.vi v12, v16, 4, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
+; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a3, 68
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v8, v16, v4
-; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: vrgatherei16.vv v8, v0, v20
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
+; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v20, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
@@ -461,48 +473,51 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a1, 15
; RV32-NEXT: vmv.s.x v3, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v8, v16, 6
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vslideup.vi v8, v24, 6
; RV32-NEXT: vmv1r.v v0, v3
-; RV32-NEXT: vrgatherei16.vv v8, v24, v12, v0.t
+; RV32-NEXT: vrgatherei16.vv v8, v16, v12, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vmv4r.v v24, v16
; RV32-NEXT: lui a1, %hi(.LCPI6_11)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11)
; RV32-NEXT: lui a3, %hi(.LCPI6_12)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v24, (a1)
+; RV32-NEXT: vle16.v v28, (a1)
; RV32-NEXT: vle16.v v4, (a3)
; RV32-NEXT: li a1, 1008
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a3, 68
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v24
+; RV32-NEXT: vrgatherei16.vv v8, v16, v28
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 60
; RV32-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108012
More information about the llvm-commits
mailing list