[llvm] [RISCV] Mark subvector extracts from index 0 as cheap (PR #134101)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 2 08:41:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
Previously we only marked fixed length vector extracts as cheap, so this extends it to any extract at index 0 which should just be a subreg extract.
This allows extracts of i1 vectors to be considered for DAG combines, but also scalable vectors too.
This causes some slight improvements with large legalized fixed-length vectors, but the underlying motiviation for this is to actually prevent an unprofitable DAG combine on a scalable vector in an upcoming patch.
---
Patch is 748.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134101.diff
17 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (+275-284)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+2654-2937)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+2308-2635)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll (+24-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll (+12-14)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll (+25-46)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll (+25-46)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll (+25-46)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (+16-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll (+29-60)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll (+29-60)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll (+32-63)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll (+32-63)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll (+32-63)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll (+29-60)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll (+29-60)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 967a6cf82433f..cc4ad13395e61 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2250,6 +2250,10 @@ bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
+ // Extracts from index 0 are just subreg extracts.
+ if (Index == 0)
+ return true;
+
// Only support extracting a fixed from a fixed vector for now.
if (ResVT.isScalableVector() || SrcVT.isScalableVector())
return false;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 8dfa79a0f1596..f6bdd45330384 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -183,461 +183,452 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 96
+; RV32-NEXT: li a3, 100
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: sub sp, sp, a2
-; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 96 * vlenb
+; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 100 * vlenb
; RV32-NEXT: addi a4, a1, 128
; RV32-NEXT: addi a5, a1, 256
; RV32-NEXT: li a2, 32
; RV32-NEXT: lui a3, 12
+; RV32-NEXT: lui a6, 12291
+; RV32-NEXT: lui a7, %hi(.LCPI8_0)
+; RV32-NEXT: addi a7, a7, %lo(.LCPI8_0)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vle32.v v16, (a5)
-; RV32-NEXT: lui a5, 12291
-; RV32-NEXT: vmv.s.x v3, a3
-; RV32-NEXT: vle32.v v24, (a1)
+; RV32-NEXT: vle32.v v24, (a5)
+; RV32-NEXT: vmv.s.x v0, a3
+; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 80
-; RV32-NEXT: mul a1, a1, a6
+; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: addi a6, a6, 3
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vslideup.vi v8, v16, 4
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 56
-; RV32-NEXT: mul a1, a1, a6
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vslideup.vi v16, v24, 4
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v16, v16, 16
+; RV32-NEXT: vslidedown.vi v8, v24, 16
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 88
-; RV32-NEXT: mul a1, a1, a6
+; RV32-NEXT: li a5, 76
+; RV32-NEXT: mul a1, a1, a5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 88
-; RV32-NEXT: mul a1, a1, a6
+; RV32-NEXT: li a5, 92
+; RV32-NEXT: mul a1, a1, a5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vmv1r.v v30, v0
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t
+; RV32-NEXT: vslideup.vi v16, v8, 10, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a6, 68
-; RV32-NEXT: mul a1, a1, a6
+; RV32-NEXT: li a5, 72
+; RV32-NEXT: mul a1, a1, a5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; RV32-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vle32.v v8, (a4)
-; RV32-NEXT: addi a5, a5, 3
-; RV32-NEXT: vmv.s.x v0, a5
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 56
+; RV32-NEXT: li a4, 84
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vslideup.vi v4, v16, 2
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vmerge.vvm v24, v8, v24, v0
-; RV32-NEXT: vmv1r.v v0, v3
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 88
-; RV32-NEXT: mul a1, a1, a4
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vslideup.vi v4, v16, 8, v0.t
+; RV32-NEXT: vle16.v v28, (a7)
+; RV32-NEXT: vmv.s.x v0, a6
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v4, (a1) # vscale x 32-byte Folded Spill
-; RV32-NEXT: lui a1, %hi(.LCPI8_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0)
-; RV32-NEXT: lui a4, 49164
-; RV32-NEXT: lui a5, %hi(.LCPI8_1)
-; RV32-NEXT: addi a5, a5, %lo(.LCPI8_1)
-; RV32-NEXT: vle16.v v6, (a1)
-; RV32-NEXT: addi a4, a4, 12
-; RV32-NEXT: vle16.v v4, (a5)
-; RV32-NEXT: vmv.s.x v0, a4
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v16, v24, v6
+; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 48
+; RV32-NEXT: li a4, 84
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV32-NEXT: vmv8r.v v16, v8
+; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vmerge.vvm v16, v8, v16, v0
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vrgatherei16.vv v0, v16, v28
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 72
+; RV32-NEXT: li a4, 52
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vs8r.v v0, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT: vslideup.vi v8, v24, 2
+; RV32-NEXT: vmv1r.v v0, v30
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 80
+; RV32-NEXT: li a4, 92
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vmerge.vvm v8, v16, v8, v0
-; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v16, v8, v4
+; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vslideup.vi v8, v16, 8, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 40
+; RV32-NEXT: li a4, 60
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV32-NEXT: lui a5, 196656
-; RV32-NEXT: lui a1, %hi(.LCPI8_2)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI8_2)
-; RV32-NEXT: lui a6, 3
-; RV32-NEXT: lui a7, 786624
-; RV32-NEXT: lui t0, 768
-; RV32-NEXT: li a4, 48
-; RV32-NEXT: addi a5, a5, 48
-; RV32-NEXT: vmv.s.x v0, a5
-; RV32-NEXT: addi a6, a6, 3
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li t1, 80
-; RV32-NEXT: mul a5, a5, t1
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li t1, 72
-; RV32-NEXT: mul a5, a5, t1
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; RV32-NEXT: lui a7, 49164
+; RV32-NEXT: lui a1, %hi(.LCPI8_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI8_1)
+; RV32-NEXT: lui t2, 3
+; RV32-NEXT: lui t1, 196656
+; RV32-NEXT: lui a4, %hi(.LCPI8_3)
+; RV32-NEXT: addi a4, a4, %lo(.LCPI8_3)
+; RV32-NEXT: lui t0, 786624
+; RV32-NEXT: li a5, 48
+; RV32-NEXT: lui a6, 768
+; RV32-NEXT: addi a7, a7, 12
+; RV32-NEXT: vmv.s.x v0, a7
+; RV32-NEXT: addi t2, t2, 3
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t3, 84
+; RV32-NEXT: mul a7, a7, t3
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vl8r.v v16, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: slli a7, a7, 6
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vmerge.vvm v8, v16, v8, v0
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 3
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
-; RV32-NEXT: vmv.s.x v0, a6
-; RV32-NEXT: addi a5, a7, 192
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t3, 36
+; RV32-NEXT: mul a7, a7, t3
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vmv.s.x v0, t2
+; RV32-NEXT: addi a7, t1, 48
+; RV32-NEXT: csrr t1, vlenb
+; RV32-NEXT: li t2, 92
+; RV32-NEXT: mul t1, t1, t2
+; RV32-NEXT: add t1, sp, t1
+; RV32-NEXT: addi t1, t1, 16
+; RV32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: csrr t1, vlenb
+; RV32-NEXT: li t2, 76
+; RV32-NEXT: mul t1, t1, t2
+; RV32-NEXT: add t1, sp, t1
+; RV32-NEXT: addi t1, t1, 16
+; RV32-NEXT: vl8r.v v8, (t1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v24, v8, v0
+; RV32-NEXT: addi t1, sp, 16
+; RV32-NEXT: vs4r.v v8, (t1) # vscale x 32-byte Folded Spill
+; RV32-NEXT: vmv.s.x v0, a7
+; RV32-NEXT: addi a3, a3, 12
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: slli a7, a7, 6
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v16, v24, v0
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t1, 20
+; RV32-NEXT: mul a7, a7, t1
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vmv8r.v v16, v24
+; RV32-NEXT: vmv.s.x v0, a3
+; RV32-NEXT: addi a3, t0, 192
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t0, 92
+; RV32-NEXT: mul a7, a7, t0
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t0, 76
+; RV32-NEXT: mul a7, a7, t0
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v24, v8, v0
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t0, 48
+; RV32-NEXT: mul a7, a7, t0
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vs4r.v v8, (a7) # vscale x 32-byte Folded Spill
+; RV32-NEXT: vmv.s.x v0, a3
+; RV32-NEXT: li a3, 192
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t0, 84
+; RV32-NEXT: mul a7, a7, t0
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
+; RV32-NEXT: csrr a7, vlenb
+; RV32-NEXT: li t0, 28
+; RV32-NEXT: mul a7, a7, t0
+; RV32-NEXT: add a7, sp, a7
+; RV32-NEXT: addi a7, a7, 16
+; RV32-NEXT: vs8r.v v8, (a7) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vmv.s.x v0, a5
+; RV32-NEXT: addi a5, a6, 768
; RV32-NEXT: csrr a6, vlenb
-; RV32-NEXT: li a7, 88
+; RV32-NEXT: li a7, 92
; RV32-NEXT: mul a6, a6, a7
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 16
-; RV32-NEXT: vl8r.v v8, (a6) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
; RV32-NEXT: csrr a6, vlenb
-; RV32-NEXT: li a7, 56
+; RV32-NEXT: li a7, 76
; RV32-NEXT: mul a6, a6, a7
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 16
-; RV32-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vl8r.v v8, (a6) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vmerge.vvm v4, v8, v24, v0
+; RV32-NEXT: vmerge.vvm v8, v24, v8, v0
+; RV32-NEXT: csrr a6, vlenb
+; RV32-NEXT: li a7, 44
+; RV32-NEXT: mul a6, a6, a7
+; RV32-NEXT: add a6, sp, a6
+; RV32-NEXT: addi a6, a6, 16
+; RV32-NEXT: vs4r.v v8, (a6) # vscale x 32-byte Folded Spill
; RV32-NEXT: vmv.s.x v0, a5
-; RV32-NEXT: addi a3, a3, 12
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 80
-; RV32-NEXT: mul a5, a5, a6
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 72
-; RV32-NEXT: mul a5, a5, a6
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vle16.v v6, (a1)
+; RV32-NEXT: vle16.v v2, (a4)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a4, 84
+; RV32-NEXT: mul a1, a1, a4
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 5
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
-; RV32-NEXT: vmv.s.x v0, a3
-; RV32-NEXT: addi a3, t0, 768
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 88
-; RV32-NEXT: mul a5, a5, a6
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vmerge.vvm v8, v8, v24, v0
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 28
-; RV32-NEXT: mul a5, a5, a6
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs4r.v v8, (a5) # vscale x 32-byte Folded Spill
-; RV32-NEXT: vmv.s.x v0, a3
-; RV32-NEXT: lui a3, 3073
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 80
-; RV32-NEXT: mul a5, a5, a6
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: li a6, 72
-; RV32-NEXT: mul a5, a5, a6
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vmerge.vvm v16, v8, v16, v0
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # vscale x 64-byte Folded Spill
-; RV32-NEXT: vmv.s.x v0, a4
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vle16.v v28, (a1)
-; RV32-NEXT: addi a1, a3, -1024
-; RV32-NEXT: vmv4r.v v8, v24
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 88
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vl8r.v v16, (a3) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 24
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs4r.v v16, (a3) # vscale x 32-byte Folded Spill
-; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: li a4, 12
+; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV32-NEXT: vrgatherei16.vv v8, v16, v28
-; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: vmv.s.x v0, a3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 80
+; RV32-NEXT: li a3, 36
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vrgatherei16.vv v24, v8, v6
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 92
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 76
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vmerge.vvm v16, v8, v16, v0
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v24, v8, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 80
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: li a3, 92
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 20
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; RV32-NEXT: lui a1, %hi(.LCPI8_3)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI8_3)
-; RV32-NEXT: li a2, 192
-; RV32-NEXT: vmv.s.x v0, a2
+; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV32-NEXT: vrgatherei16.vv v24, v8, v2
+; RV32-NEXT: lui a1, %hi(.LCPI8_2)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI8_2)
+; RV32-NEXT: lui a3, 3073
+; RV32-NEXT: addi a3, a3, -1024
+; RV32-NEXT: vmv.s.x v0, a3
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v12, (a1)
+; RV32-NEXT: vle16.v v3, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 88
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: li a3, 84
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vmerge.vvm v8, v16, v24, v0
+; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v8, v16, v0
; RV32-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/134101
More information about the llvm-commits
mailing list