[llvm] 502edeb - [ValueTypes][RISCV] Cap RVV fixed-length vectors by size

Fraser Cormack via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 9 04:23:55 PDT 2021


Author: Fraser Cormack
Date: 2021-06-09T12:15:37+01:00
New Revision: 502edebd9d6eb4665add9fd5edaa2dd1b1b24298

URL: https://github.com/llvm/llvm-project/commit/502edebd9d6eb4665add9fd5edaa2dd1b1b24298
DIFF: https://github.com/llvm/llvm-project/commit/502edebd9d6eb4665add9fd5edaa2dd1b1b24298.diff

LOG: [ValueTypes][RISCV] Cap RVV fixed-length vectors by size

This patch changes RVV's policy for its supported list of fixed-length
vector types by capping by vector size rather than element count. Now
all 1024-byte vectors (of supported element types) are supported, rather
than all 256-element vectors.

This is a more natural fit for the architecture, and allows us to, for
example, improve the support for vector bitcasts.

This change necessitated the adding of some new simple types to avoid
"regressing" on the number of currently-supported vectors. We round out
the 1024-byte types by adding `v512i8`, `v1024i8`, `v512i16` and
`v512f16`.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D103884

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/ValueTypes.td
    llvm/include/llvm/Support/MachineValueType.h
    llvm/lib/CodeGen/ValueTypes.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
    llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
    llvm/utils/TableGen/CodeGenTarget.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 72581f505e2a0..c797039b78faa 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -46,166 +46,170 @@ def v256i1  : ValueType<256,  23>;  //  256 x i1 vector value
 def v512i1  : ValueType<512,  24>;  //  512 x i1 vector value
 def v1024i1 : ValueType<1024, 25>;  // 1024 x i1 vector value
 
-def v1i8   : ValueType<8,    26>;  //   1 x i8  vector value
-def v2i8   : ValueType<16,   27>;  //   2 x i8  vector value
-def v4i8   : ValueType<32,   28>;  //   4 x i8  vector value
-def v8i8   : ValueType<64,   29>;  //   8 x i8  vector value
-def v16i8  : ValueType<128,  30>;  //  16 x i8  vector value
-def v32i8  : ValueType<256,  31>;  //  32 x i8  vector value
-def v64i8  : ValueType<512,  32>;  //  64 x i8  vector value
-def v128i8 : ValueType<1024, 33>;  // 128 x i8  vector value
-def v256i8 : ValueType<2048, 34>;  // 256 x i8  vector value
-
-def v1i16   : ValueType<16,   35>;  //   1 x i16 vector value
-def v2i16   : ValueType<32,   36>;  //   2 x i16 vector value
-def v3i16   : ValueType<48,   37>;  //   3 x i16 vector value
-def v4i16   : ValueType<64,   38>;  //   4 x i16 vector value
-def v8i16   : ValueType<128,  39>;  //   8 x i16 vector value
-def v16i16  : ValueType<256,  40>;  //  16 x i16 vector value
-def v32i16  : ValueType<512,  41>;  //  32 x i16 vector value
-def v64i16  : ValueType<1024, 42>;  //  64 x i16 vector value
-def v128i16 : ValueType<2048, 43>;  // 128 x i16 vector value
-def v256i16 : ValueType<4096, 44>;  // 256 x i16 vector value
-
-def v1i32    : ValueType<32,    45>;  //    1 x i32 vector value
-def v2i32    : ValueType<64,    46>;  //    2 x i32 vector value
-def v3i32    : ValueType<96,    47>;  //    3 x i32 vector value
-def v4i32    : ValueType<128,   48>;  //    4 x i32 vector value
-def v5i32    : ValueType<160,   49>;  //    5 x i32 vector value
-def v8i32    : ValueType<256,   50>;  //    8 x i32 vector value
-def v16i32   : ValueType<512,   51>;  //   16 x i32 vector value
-def v32i32   : ValueType<1024,  52>;  //   32 x i32 vector value
-def v64i32   : ValueType<2048,  53>;  //   64 x i32 vector value
-def v128i32  : ValueType<4096,  54>;  //  128 x i32 vector value
-def v256i32  : ValueType<8192,  55>;  //  256 x i32 vector value
-def v512i32  : ValueType<16384, 56>;  //  512 x i32 vector value
-def v1024i32 : ValueType<32768, 57>;  // 1024 x i32 vector value
-def v2048i32 : ValueType<65536, 58>;  // 2048 x i32 vector value
-
-def v1i64   : ValueType<64,    59>;  //   1 x i64 vector value
-def v2i64   : ValueType<128,   60>;  //   2 x i64 vector value
-def v4i64   : ValueType<256,   61>;  //   4 x i64 vector value
-def v8i64   : ValueType<512,   62>;  //   8 x i64 vector value
-def v16i64  : ValueType<1024,  63>;  //  16 x i64 vector value
-def v32i64  : ValueType<2048,  64>;  //  32 x i64 vector value
-def v64i64  : ValueType<4096,  65>;  //  64 x i64 vector value
-def v128i64 : ValueType<8192,  66>;  // 128 x i64 vector value
-def v256i64 : ValueType<16384, 67>;  // 256 x i64 vector value
-
-def v1i128 : ValueType<128, 68>;  //  1 x i128 vector value
-
-def v1f16    : ValueType<16,     69>;  //    1 x f16 vector value
-def v2f16    : ValueType<32,     70>;  //    2 x f16 vector value
-def v3f16    : ValueType<48,     71>;  //    3 x f16 vector value
-def v4f16    : ValueType<64,     72>;  //    4 x f16 vector value
-def v8f16    : ValueType<128,    73>;  //    8 x f16 vector value
-def v16f16   : ValueType<256,    74>;  //   16 x f16 vector value
-def v32f16   : ValueType<512,    75>;  //   32 x f16 vector value
-def v64f16   : ValueType<1024,   76>;  //   64 x f16 vector value
-def v128f16  : ValueType<2048,   77>;  //  128 x f16 vector value
-def v256f16  : ValueType<4096,   78>;  //  256 x f16 vector value
-
-def v2bf16   : ValueType<32,     79>;  //    2 x bf16 vector value
-def v3bf16   : ValueType<48,     80>;  //    3 x bf16 vector value
-def v4bf16   : ValueType<64,     81>;  //    4 x bf16 vector value
-def v8bf16   : ValueType<128,    82>;  //    8 x bf16 vector value
-def v16bf16  : ValueType<256,    83>;  //   16 x bf16 vector value
-def v32bf16  : ValueType<512,    84>;  //   32 x bf16 vector value
-def v64bf16  : ValueType<1024,   85>;  //   64 x bf16 vector value
-def v128bf16 : ValueType<2048,   86>;  //  128 x bf16 vector value
-
-def v1f32    : ValueType<32,     87>;  //    1 x f32 vector value
-def v2f32    : ValueType<64,     88>;  //    2 x f32 vector value
-def v3f32    : ValueType<96,     89>;  //    3 x f32 vector value
-def v4f32    : ValueType<128,    90>;  //    4 x f32 vector value
-def v5f32    : ValueType<160,    91>;  //    5 x f32 vector value
-def v8f32    : ValueType<256,    92>;  //    8 x f32 vector value
-def v16f32   : ValueType<512,    93>;  //   16 x f32 vector value
-def v32f32   : ValueType<1024,   94>;  //   32 x f32 vector value
-def v64f32   : ValueType<2048,   95>;  //   64 x f32 vector value
-def v128f32  : ValueType<4096,   96>;  //  128 x f32 vector value
-def v256f32  : ValueType<8192,   97>;  //  256 x f32 vector value
-def v512f32  : ValueType<16384,  98>;  //  512 x f32 vector value
-def v1024f32 : ValueType<32768,  99>;  // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 100>;  // 2048 x f32 vector value
-
-def v1f64    : ValueType<64,    101>;  //    1 x f64 vector value
-def v2f64    : ValueType<128,   102>;  //    2 x f64 vector value
-def v4f64    : ValueType<256,   103>;  //    4 x f64 vector value
-def v8f64    : ValueType<512,   104>;  //    8 x f64 vector value
-def v16f64   : ValueType<1024,  105>;  //   16 x f64 vector value
-def v32f64   : ValueType<2048,  106>;  //   32 x f64 vector value
-def v64f64   : ValueType<4096,  107>;  //   64 x f64 vector value
-def v128f64  : ValueType<8192,  108>;  //  128 x f64 vector value
-def v256f64  : ValueType<16384, 109>;  //  256 x f64 vector value
-
-def nxv1i1  : ValueType<1,  110>;  // n x  1 x i1  vector value
-def nxv2i1  : ValueType<2,  111>;  // n x  2 x i1  vector value
-def nxv4i1  : ValueType<4,  112>;  // n x  4 x i1  vector value
-def nxv8i1  : ValueType<8,  113>;  // n x  8 x i1  vector value
-def nxv16i1 : ValueType<16, 114>;  // n x 16 x i1  vector value
-def nxv32i1 : ValueType<32, 115>;  // n x 32 x i1  vector value
-def nxv64i1 : ValueType<64, 116>;  // n x 64 x i1  vector value
-
-def nxv1i8  : ValueType<8,   117>;  // n x  1 x i8  vector value
-def nxv2i8  : ValueType<16,  118>;  // n x  2 x i8  vector value
-def nxv4i8  : ValueType<32,  119>;  // n x  4 x i8  vector value
-def nxv8i8  : ValueType<64,  120>;  // n x  8 x i8  vector value
-def nxv16i8 : ValueType<128, 121>;  // n x 16 x i8  vector value
-def nxv32i8 : ValueType<256, 122>;  // n x 32 x i8  vector value
-def nxv64i8 : ValueType<512, 123>;  // n x 64 x i8  vector value
-
-def nxv1i16  : ValueType<16,  124>;  // n x  1 x i16 vector value
-def nxv2i16  : ValueType<32,  125>;  // n x  2 x i16 vector value
-def nxv4i16  : ValueType<64,  126>;  // n x  4 x i16 vector value
-def nxv8i16  : ValueType<128, 127>;  // n x  8 x i16 vector value
-def nxv16i16 : ValueType<256, 128>;  // n x 16 x i16 vector value
-def nxv32i16 : ValueType<512, 129>;  // n x 32 x i16 vector value
-
-def nxv1i32  : ValueType<32,   130>;  // n x  1 x i32 vector value
-def nxv2i32  : ValueType<64,   131>;  // n x  2 x i32 vector value
-def nxv4i32  : ValueType<128,  132>;  // n x  4 x i32 vector value
-def nxv8i32  : ValueType<256,  133>;  // n x  8 x i32 vector value
-def nxv16i32 : ValueType<512,  134>;  // n x 16 x i32 vector value
-def nxv32i32 : ValueType<1024, 135>;  // n x 32 x i32 vector value
-
-def nxv1i64  : ValueType<64,   136>;  // n x  1 x i64 vector value
-def nxv2i64  : ValueType<128,  137>;  // n x  2 x i64 vector value
-def nxv4i64  : ValueType<256,  138>;  // n x  4 x i64 vector value
-def nxv8i64  : ValueType<512,  139>;  // n x  8 x i64 vector value
-def nxv16i64 : ValueType<1024, 140>;  // n x 16 x i64 vector value
-def nxv32i64 : ValueType<2048, 141>;  // n x 32 x i64 vector value
-
-def nxv1f16  : ValueType<16,  142>;  // n x  1 x  f16 vector value
-def nxv2f16  : ValueType<32,  143>;  // n x  2 x  f16 vector value
-def nxv4f16  : ValueType<64,  144>;  // n x  4 x  f16 vector value
-def nxv8f16  : ValueType<128, 145>;  // n x  8 x  f16 vector value
-def nxv16f16 : ValueType<256, 146>;  // n x 16 x  f16 vector value
-def nxv32f16 : ValueType<512, 147>;  // n x 32 x  f16 vector value
-
-def nxv1bf16 : ValueType<16,  148>;  // n x  1 x bf16 vector value
-def nxv2bf16 : ValueType<32,  149>;  // n x  2 x bf16 vector value
-def nxv4bf16 : ValueType<64,  150>;  // n x  4 x bf16 vector value
-def nxv8bf16 : ValueType<128, 151>;  // n x  8 x bf16 vector value
-
-def nxv1f32  : ValueType<32,  152>;  // n x  1 x  f32 vector value
-def nxv2f32  : ValueType<64,  153>;  // n x  2 x  f32 vector value
-def nxv4f32  : ValueType<128, 154>;  // n x  4 x  f32 vector value
-def nxv8f32  : ValueType<256, 155>;  // n x  8 x  f32 vector value
-def nxv16f32 : ValueType<512, 156>;  // n x 16 x  f32 vector value
-
-def nxv1f64  : ValueType<64,  157>;  // n x  1 x  f64 vector value
-def nxv2f64  : ValueType<128, 158>;  // n x  2 x  f64 vector value
-def nxv4f64  : ValueType<256, 159>;  // n x  4 x  f64 vector value
-def nxv8f64  : ValueType<512, 160>;  // n x  8 x  f64 vector value
-
-def x86mmx    : ValueType<64,   161>;  // X86 MMX value
-def FlagVT    : ValueType<0,    162>;  // Pre-RA sched glue
-def isVoid    : ValueType<0,    163>;  // Produces no value
-def untyped   : ValueType<8,    164>;  // Produces an untyped value
-def funcref   : ValueType<0,    165>;  // WebAssembly's funcref type
-def externref : ValueType<0,    166>;  // WebAssembly's externref type
-def x86amx    : ValueType<8192, 167>;  // X86 AMX value
+def v1i8    : ValueType<8,    26>;  //    1 x i8 vector value
+def v2i8    : ValueType<16,   27>;  //    2 x i8 vector value
+def v4i8    : ValueType<32,   28>;  //    4 x i8 vector value
+def v8i8    : ValueType<64,   29>;  //    8 x i8 vector value
+def v16i8   : ValueType<128,  30>;  //   16 x i8 vector value
+def v32i8   : ValueType<256,  31>;  //   32 x i8 vector value
+def v64i8   : ValueType<512,  32>;  //   64 x i8 vector value
+def v128i8  : ValueType<1024, 33>;  //  128 x i8 vector value
+def v256i8  : ValueType<2048, 34>;  //  256 x i8 vector value
+def v512i8  : ValueType<4096, 35>;  //  512 x i8 vector value
+def v1024i8 : ValueType<8192, 36>;  // 1024 x i8 vector value
+
+def v1i16   : ValueType<16,   37>;  //   1 x i16 vector value
+def v2i16   : ValueType<32,   38>;  //   2 x i16 vector value
+def v3i16   : ValueType<48,   39>;  //   3 x i16 vector value
+def v4i16   : ValueType<64,   40>;  //   4 x i16 vector value
+def v8i16   : ValueType<128,  41>;  //   8 x i16 vector value
+def v16i16  : ValueType<256,  42>;  //  16 x i16 vector value
+def v32i16  : ValueType<512,  43>;  //  32 x i16 vector value
+def v64i16  : ValueType<1024, 44>;  //  64 x i16 vector value
+def v128i16 : ValueType<2048, 45>;  // 128 x i16 vector value
+def v256i16 : ValueType<4096, 46>;  // 256 x i16 vector value
+def v512i16 : ValueType<8192, 47>;  // 512 x i16 vector value
+
+def v1i32    : ValueType<32,    48>;  //    1 x i32 vector value
+def v2i32    : ValueType<64,    49>;  //    2 x i32 vector value
+def v3i32    : ValueType<96,    50>;  //    3 x i32 vector value
+def v4i32    : ValueType<128,   51>;  //    4 x i32 vector value
+def v5i32    : ValueType<160,   52>;  //    5 x i32 vector value
+def v8i32    : ValueType<256,   53>;  //    8 x i32 vector value
+def v16i32   : ValueType<512,   54>;  //   16 x i32 vector value
+def v32i32   : ValueType<1024,  55>;  //   32 x i32 vector value
+def v64i32   : ValueType<2048,  56>;  //   64 x i32 vector value
+def v128i32  : ValueType<4096,  57>;  //  128 x i32 vector value
+def v256i32  : ValueType<8192,  58>;  //  256 x i32 vector value
+def v512i32  : ValueType<16384, 59>;  //  512 x i32 vector value
+def v1024i32 : ValueType<32768, 60>;  // 1024 x i32 vector value
+def v2048i32 : ValueType<65536, 61>;  // 2048 x i32 vector value
+
+def v1i64   : ValueType<64,    62>;  //   1 x i64 vector value
+def v2i64   : ValueType<128,   63>;  //   2 x i64 vector value
+def v4i64   : ValueType<256,   64>;  //   4 x i64 vector value
+def v8i64   : ValueType<512,   65>;  //   8 x i64 vector value
+def v16i64  : ValueType<1024,  66>;  //  16 x i64 vector value
+def v32i64  : ValueType<2048,  67>;  //  32 x i64 vector value
+def v64i64  : ValueType<4096,  68>;  //  64 x i64 vector value
+def v128i64 : ValueType<8192,  69>;  // 128 x i64 vector value
+def v256i64 : ValueType<16384, 70>;  // 256 x i64 vector value
+
+def v1i128 : ValueType<128, 71>;  //  1 x i128 vector value
+
+def v1f16    : ValueType<16,     72>;  //    1 x f16 vector value
+def v2f16    : ValueType<32,     73>;  //    2 x f16 vector value
+def v3f16    : ValueType<48,     74>;  //    3 x f16 vector value
+def v4f16    : ValueType<64,     75>;  //    4 x f16 vector value
+def v8f16    : ValueType<128,    76>;  //    8 x f16 vector value
+def v16f16   : ValueType<256,    77>;  //   16 x f16 vector value
+def v32f16   : ValueType<512,    78>;  //   32 x f16 vector value
+def v64f16   : ValueType<1024,   79>;  //   64 x f16 vector value
+def v128f16  : ValueType<2048,   80>;  //  128 x f16 vector value
+def v256f16  : ValueType<4096,   81>;  //  256 x f16 vector value
+def v512f16  : ValueType<8192,   82>;  //  512 x f16 vector value
+
+def v2bf16   : ValueType<32,     83>;  //    2 x bf16 vector value
+def v3bf16   : ValueType<48,     84>;  //    3 x bf16 vector value
+def v4bf16   : ValueType<64,     85>;  //    4 x bf16 vector value
+def v8bf16   : ValueType<128,    86>;  //    8 x bf16 vector value
+def v16bf16  : ValueType<256,    87>;  //   16 x bf16 vector value
+def v32bf16  : ValueType<512,    88>;  //   32 x bf16 vector value
+def v64bf16  : ValueType<1024,   89>;  //   64 x bf16 vector value
+def v128bf16 : ValueType<2048,   90>;  //  128 x bf16 vector value
+
+def v1f32    : ValueType<32,     91>;  //    1 x f32 vector value
+def v2f32    : ValueType<64,     92>;  //    2 x f32 vector value
+def v3f32    : ValueType<96,     93>;  //    3 x f32 vector value
+def v4f32    : ValueType<128,    94>;  //    4 x f32 vector value
+def v5f32    : ValueType<160,    95>;  //    5 x f32 vector value
+def v8f32    : ValueType<256,    96>;  //    8 x f32 vector value
+def v16f32   : ValueType<512,    97>;  //   16 x f32 vector value
+def v32f32   : ValueType<1024,   98>;  //   32 x f32 vector value
+def v64f32   : ValueType<2048,   99>;  //   64 x f32 vector value
+def v128f32  : ValueType<4096,  100>;  //  128 x f32 vector value
+def v256f32  : ValueType<8192,  101>;  //  256 x f32 vector value
+def v512f32  : ValueType<16384, 102>;  //  512 x f32 vector value
+def v1024f32 : ValueType<32768, 103>;  // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 104>;  // 2048 x f32 vector value
+
+def v1f64    : ValueType<64,    105>;  //    1 x f64 vector value
+def v2f64    : ValueType<128,   106>;  //    2 x f64 vector value
+def v4f64    : ValueType<256,   107>;  //    4 x f64 vector value
+def v8f64    : ValueType<512,   108>;  //    8 x f64 vector value
+def v16f64   : ValueType<1024,  109>;  //   16 x f64 vector value
+def v32f64   : ValueType<2048,  110>;  //   32 x f64 vector value
+def v64f64   : ValueType<4096,  111>;  //   64 x f64 vector value
+def v128f64  : ValueType<8192,  112>;  //  128 x f64 vector value
+def v256f64  : ValueType<16384, 113>;  //  256 x f64 vector value
+
+def nxv1i1  : ValueType<1,  114>;  // n x  1 x i1  vector value
+def nxv2i1  : ValueType<2,  115>;  // n x  2 x i1  vector value
+def nxv4i1  : ValueType<4,  116>;  // n x  4 x i1  vector value
+def nxv8i1  : ValueType<8,  117>;  // n x  8 x i1  vector value
+def nxv16i1 : ValueType<16, 118>;  // n x 16 x i1  vector value
+def nxv32i1 : ValueType<32, 119>;  // n x 32 x i1  vector value
+def nxv64i1 : ValueType<64, 120>;  // n x 64 x i1  vector value
+
+def nxv1i8  : ValueType<8,   121>;  // n x  1 x i8  vector value
+def nxv2i8  : ValueType<16,  122>;  // n x  2 x i8  vector value
+def nxv4i8  : ValueType<32,  123>;  // n x  4 x i8  vector value
+def nxv8i8  : ValueType<64,  124>;  // n x  8 x i8  vector value
+def nxv16i8 : ValueType<128, 125>;  // n x 16 x i8  vector value
+def nxv32i8 : ValueType<256, 126>;  // n x 32 x i8  vector value
+def nxv64i8 : ValueType<512, 127>;  // n x 64 x i8  vector value
+
+def nxv1i16  : ValueType<16,  128>;  // n x  1 x i16 vector value
+def nxv2i16  : ValueType<32,  129>;  // n x  2 x i16 vector value
+def nxv4i16  : ValueType<64,  130>;  // n x  4 x i16 vector value
+def nxv8i16  : ValueType<128, 131>;  // n x  8 x i16 vector value
+def nxv16i16 : ValueType<256, 132>;  // n x 16 x i16 vector value
+def nxv32i16 : ValueType<512, 133>;  // n x 32 x i16 vector value
+
+def nxv1i32  : ValueType<32,   134>;  // n x  1 x i32 vector value
+def nxv2i32  : ValueType<64,   135>;  // n x  2 x i32 vector value
+def nxv4i32  : ValueType<128,  136>;  // n x  4 x i32 vector value
+def nxv8i32  : ValueType<256,  137>;  // n x  8 x i32 vector value
+def nxv16i32 : ValueType<512,  138>;  // n x 16 x i32 vector value
+def nxv32i32 : ValueType<1024, 139>;  // n x 32 x i32 vector value
+
+def nxv1i64  : ValueType<64,   140>;  // n x  1 x i64 vector value
+def nxv2i64  : ValueType<128,  141>;  // n x  2 x i64 vector value
+def nxv4i64  : ValueType<256,  142>;  // n x  4 x i64 vector value
+def nxv8i64  : ValueType<512,  143>;  // n x  8 x i64 vector value
+def nxv16i64 : ValueType<1024, 144>;  // n x 16 x i64 vector value
+def nxv32i64 : ValueType<2048, 145>;  // n x 32 x i64 vector value
+
+def nxv1f16  : ValueType<16,  146>;  // n x  1 x  f16 vector value
+def nxv2f16  : ValueType<32,  147>;  // n x  2 x  f16 vector value
+def nxv4f16  : ValueType<64,  148>;  // n x  4 x  f16 vector value
+def nxv8f16  : ValueType<128, 149>;  // n x  8 x  f16 vector value
+def nxv16f16 : ValueType<256, 150>;  // n x 16 x  f16 vector value
+def nxv32f16 : ValueType<512, 151>;  // n x 32 x  f16 vector value
+
+def nxv1bf16 : ValueType<16,  152>;  // n x  1 x bf16 vector value
+def nxv2bf16 : ValueType<32,  153>;  // n x  2 x bf16 vector value
+def nxv4bf16 : ValueType<64,  154>;  // n x  4 x bf16 vector value
+def nxv8bf16 : ValueType<128, 155>;  // n x  8 x bf16 vector value
+
+def nxv1f32  : ValueType<32,  156>;  // n x  1 x  f32 vector value
+def nxv2f32  : ValueType<64,  157>;  // n x  2 x  f32 vector value
+def nxv4f32  : ValueType<128, 158>;  // n x  4 x  f32 vector value
+def nxv8f32  : ValueType<256, 159>;  // n x  8 x  f32 vector value
+def nxv16f32 : ValueType<512, 160>;  // n x 16 x  f32 vector value
+
+def nxv1f64  : ValueType<64,  161>;  // n x  1 x  f64 vector value
+def nxv2f64  : ValueType<128, 162>;  // n x  2 x  f64 vector value
+def nxv4f64  : ValueType<256, 163>;  // n x  4 x  f64 vector value
+def nxv8f64  : ValueType<512, 164>;  // n x  8 x  f64 vector value
+
+def x86mmx    : ValueType<64,   165>;  // X86 MMX value
+def FlagVT    : ValueType<0,    166>;  // Pre-RA sched glue
+def isVoid    : ValueType<0,    167>;  // Produces no value
+def untyped   : ValueType<8,    168>;  // Produces an untyped value
+def funcref   : ValueType<0,    169>;  // WebAssembly's funcref type
+def externref : ValueType<0,    170>;  // WebAssembly's externref type
+def x86amx    : ValueType<8192, 171>;  // X86 AMX value
 
 
 def token      : ValueType<0, 248>;  // TokenTy

diff  --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index 11c0aa412a33d..88ae8f8e6fc4d 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -72,101 +72,105 @@ namespace llvm {
       v512i1         =  24,   //  512 x i1
       v1024i1        =  25,   // 1024 x i1
 
-      v1i8           =  26,   //   1 x i8
-      v2i8           =  27,   //   2 x i8
-      v4i8           =  28,   //   4 x i8
-      v8i8           =  29,   //   8 x i8
-      v16i8          =  30,   //  16 x i8
-      v32i8          =  31,   //  32 x i8
-      v64i8          =  32,   //  64 x i8
-      v128i8         =  33,   // 128 x i8
-      v256i8         =  34,   // 256 x i8
-
-      v1i16          =  35,   //   1 x i16
-      v2i16          =  36,   //   2 x i16
-      v3i16          =  37,   //   3 x i16
-      v4i16          =  38,   //   4 x i16
-      v8i16          =  39,   //   8 x i16
-      v16i16         =  40,   //  16 x i16
-      v32i16         =  41,   //  32 x i16
-      v64i16         =  42,   //  64 x i16
-      v128i16        =  43,   // 128 x i16
-      v256i16        =  44,   // 256 x i16
-
-      v1i32          =  45,   //    1 x i32
-      v2i32          =  46,   //    2 x i32
-      v3i32          =  47,   //    3 x i32
-      v4i32          =  48,   //    4 x i32
-      v5i32          =  49,   //    5 x i32
-      v8i32          =  50,   //    8 x i32
-      v16i32         =  51,   //   16 x i32
-      v32i32         =  52,   //   32 x i32
-      v64i32         =  53,   //   64 x i32
-      v128i32        =  54,   //  128 x i32
-      v256i32        =  55,   //  256 x i32
-      v512i32        =  56,   //  512 x i32
-      v1024i32       =  57,   // 1024 x i32
-      v2048i32       =  58,   // 2048 x i32
-
-      v1i64          =  59,   //   1 x i64
-      v2i64          =  60,   //   2 x i64
-      v4i64          =  61,   //   4 x i64
-      v8i64          =  62,   //   8 x i64
-      v16i64         =  63,   //  16 x i64
-      v32i64         =  64,   //  32 x i64
-      v64i64         =  65,   //  64 x i64
-      v128i64        =  66,   // 128 x i64
-      v256i64        =  67,   // 256 x i64
-
-      v1i128         =  68,   //  1 x i128
+      v1i8           =  26,   //    1 x i8
+      v2i8           =  27,   //    2 x i8
+      v4i8           =  28,   //    4 x i8
+      v8i8           =  29,   //    8 x i8
+      v16i8          =  30,   //   16 x i8
+      v32i8          =  31,   //   32 x i8
+      v64i8          =  32,   //   64 x i8
+      v128i8         =  33,   //  128 x i8
+      v256i8         =  34,   //  256 x i8
+      v512i8         =  35,   //  512 x i8
+      v1024i8        =  36,   // 1024 x i8
+
+      v1i16          =  37,   //   1 x i16
+      v2i16          =  38,   //   2 x i16
+      v3i16          =  39,   //   3 x i16
+      v4i16          =  40,   //   4 x i16
+      v8i16          =  41,   //   8 x i16
+      v16i16         =  42,   //  16 x i16
+      v32i16         =  43,   //  32 x i16
+      v64i16         =  44,   //  64 x i16
+      v128i16        =  45,   // 128 x i16
+      v256i16        =  46,   // 256 x i16
+      v512i16        =  47,   // 512 x i16
+
+      v1i32          =  48,   //    1 x i32
+      v2i32          =  49,   //    2 x i32
+      v3i32          =  50,   //    3 x i32
+      v4i32          =  51,   //    4 x i32
+      v5i32          =  52,   //    5 x i32
+      v8i32          =  53,   //    8 x i32
+      v16i32         =  54,   //   16 x i32
+      v32i32         =  55,   //   32 x i32
+      v64i32         =  56,   //   64 x i32
+      v128i32        =  57,   //  128 x i32
+      v256i32        =  58,   //  256 x i32
+      v512i32        =  59,   //  512 x i32
+      v1024i32       =  60,   // 1024 x i32
+      v2048i32       =  61,   // 2048 x i32
+
+      v1i64          =  62,   //   1 x i64
+      v2i64          =  63,   //   2 x i64
+      v4i64          =  64,   //   4 x i64
+      v8i64          =  65,   //   8 x i64
+      v16i64         =  66,   //  16 x i64
+      v32i64         =  67,   //  32 x i64
+      v64i64         =  68,   //  64 x i64
+      v128i64        =  69,   // 128 x i64
+      v256i64        =  70,   // 256 x i64
+
+      v1i128         =  71,   //  1 x i128
 
       FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
       LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
 
-      v1f16          =  69,   //    1 x f16
-      v2f16          =  70,   //    2 x f16
-      v3f16          =  71,   //    3 x f16
-      v4f16          =  72,   //    4 x f16
-      v8f16          =  73,   //    8 x f16
-      v16f16         =  74,   //   16 x f16
-      v32f16         =  75,   //   32 x f16
-      v64f16         =  76,   //   64 x f16
-      v128f16        =  77,   //  128 x f16
-      v256f16        =  78,   //  256 x f16
-
-      v2bf16         =  79,   //    2 x bf16
-      v3bf16         =  80,   //    3 x bf16
-      v4bf16         =  81,   //    4 x bf16
-      v8bf16         =  82,   //    8 x bf16
-      v16bf16        =  83,   //   16 x bf16
-      v32bf16        =  84,   //   32 x bf16
-      v64bf16        =  85,   //   64 x bf16
-      v128bf16       =  86,   //  128 x bf16
-
-      v1f32          =  87,   //    1 x f32
-      v2f32          =  88,   //    2 x f32
-      v3f32          =  89,   //    3 x f32
-      v4f32          =  90,   //    4 x f32
-      v5f32          =  91,   //    5 x f32
-      v8f32          =  92,   //    8 x f32
-      v16f32         =  93,   //   16 x f32
-      v32f32         =  94,   //   32 x f32
-      v64f32         =  95,   //   64 x f32
-      v128f32        =  96,   //  128 x f32
-      v256f32        =  97,   //  256 x f32
-      v512f32        =  98,   //  512 x f32
-      v1024f32       =  99,   // 1024 x f32
-      v2048f32       = 100,   // 2048 x f32
-
-      v1f64          = 101,   //    1 x f64
-      v2f64          = 102,   //    2 x f64
-      v4f64          = 103,   //    4 x f64
-      v8f64          = 104,   //    8 x f64
-      v16f64         = 105,   //   16 x f64
-      v32f64         = 106,   //   32 x f64
-      v64f64         = 107,   //   64 x f64
-      v128f64        = 108,   //  128 x f64
-      v256f64        = 109,   //  256 x f64
+      v1f16          =  72,   //    1 x f16
+      v2f16          =  73,   //    2 x f16
+      v3f16          =  74,   //    3 x f16
+      v4f16          =  75,   //    4 x f16
+      v8f16          =  76,   //    8 x f16
+      v16f16         =  77,   //   16 x f16
+      v32f16         =  78,   //   32 x f16
+      v64f16         =  79,   //   64 x f16
+      v128f16        =  80,   //  128 x f16
+      v256f16        =  81,   //  256 x f16
+      v512f16        =  82,   //  256 x f16
+
+      v2bf16         =  83,   //    2 x bf16
+      v3bf16         =  84,   //    3 x bf16
+      v4bf16         =  85,   //    4 x bf16
+      v8bf16         =  86,   //    8 x bf16
+      v16bf16        =  87,   //   16 x bf16
+      v32bf16        =  88,   //   32 x bf16
+      v64bf16        =  89,   //   64 x bf16
+      v128bf16       =  90,   //  128 x bf16
+
+      v1f32          =  91,   //    1 x f32
+      v2f32          =  92,   //    2 x f32
+      v3f32          =  93,   //    3 x f32
+      v4f32          =  94,   //    4 x f32
+      v5f32          =  95,   //    5 x f32
+      v8f32          =  96,   //    8 x f32
+      v16f32         =  97,   //   16 x f32
+      v32f32         =  98,   //   32 x f32
+      v64f32         =  99,   //   64 x f32
+      v128f32        = 100,   //  128 x f32
+      v256f32        = 101,   //  256 x f32
+      v512f32        = 102,   //  512 x f32
+      v1024f32       = 103,   // 1024 x f32
+      v2048f32       = 104,   // 2048 x f32
+
+      v1f64          = 105,   //    1 x f64
+      v2f64          = 106,   //    2 x f64
+      v4f64          = 107,   //    4 x f64
+      v8f64          = 108,   //    8 x f64
+      v16f64         = 109,   //   16 x f64
+      v32f64         = 110,   //   32 x f64
+      v64f64         = 111,   //   64 x f64
+      v128f64        = 112,   //  128 x f64
+      v256f64        = 113,   //  256 x f64
 
       FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16,
       LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
@@ -174,68 +178,68 @@ namespace llvm {
       FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
       LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
 
-      nxv1i1         = 110,   // n x  1 x i1
-      nxv2i1         = 111,   // n x  2 x i1
-      nxv4i1         = 112,   // n x  4 x i1
-      nxv8i1         = 113,   // n x  8 x i1
-      nxv16i1        = 114,   // n x 16 x i1
-      nxv32i1        = 115,   // n x 32 x i1
-      nxv64i1        = 116,   // n x 64 x i1
-
-      nxv1i8         = 117,   // n x  1 x i8
-      nxv2i8         = 118,   // n x  2 x i8
-      nxv4i8         = 119,   // n x  4 x i8
-      nxv8i8         = 120,   // n x  8 x i8
-      nxv16i8        = 121,   // n x 16 x i8
-      nxv32i8        = 122,   // n x 32 x i8
-      nxv64i8        = 123,   // n x 64 x i8
-
-      nxv1i16        = 124,  // n x  1 x i16
-      nxv2i16        = 125,  // n x  2 x i16
-      nxv4i16        = 126,  // n x  4 x i16
-      nxv8i16        = 127,  // n x  8 x i16
-      nxv16i16       = 128,  // n x 16 x i16
-      nxv32i16       = 129,  // n x 32 x i16
-
-      nxv1i32        = 130,  // n x  1 x i32
-      nxv2i32        = 131,  // n x  2 x i32
-      nxv4i32        = 132,  // n x  4 x i32
-      nxv8i32        = 133,  // n x  8 x i32
-      nxv16i32       = 134,  // n x 16 x i32
-      nxv32i32       = 135,  // n x 32 x i32
-
-      nxv1i64        = 136,  // n x  1 x i64
-      nxv2i64        = 137,  // n x  2 x i64
-      nxv4i64        = 138,  // n x  4 x i64
-      nxv8i64        = 139,  // n x  8 x i64
-      nxv16i64       = 140,  // n x 16 x i64
-      nxv32i64       = 141,  // n x 32 x i64
+      nxv1i1         = 114,   // n x  1 x i1
+      nxv2i1         = 115,   // n x  2 x i1
+      nxv4i1         = 116,   // n x  4 x i1
+      nxv8i1         = 117,   // n x  8 x i1
+      nxv16i1        = 118,   // n x 16 x i1
+      nxv32i1        = 119,   // n x 32 x i1
+      nxv64i1        = 120,   // n x 64 x i1
+
+      nxv1i8         = 121,   // n x  1 x i8
+      nxv2i8         = 122,   // n x  2 x i8
+      nxv4i8         = 123,   // n x  4 x i8
+      nxv8i8         = 124,   // n x  8 x i8
+      nxv16i8        = 125,   // n x 16 x i8
+      nxv32i8        = 126,   // n x 32 x i8
+      nxv64i8        = 127,   // n x 64 x i8
+
+      nxv1i16        = 128,  // n x  1 x i16
+      nxv2i16        = 129,  // n x  2 x i16
+      nxv4i16        = 130,  // n x  4 x i16
+      nxv8i16        = 131,  // n x  8 x i16
+      nxv16i16       = 132,  // n x 16 x i16
+      nxv32i16       = 133,  // n x 32 x i16
+
+      nxv1i32        = 134,  // n x  1 x i32
+      nxv2i32        = 135,  // n x  2 x i32
+      nxv4i32        = 136,  // n x  4 x i32
+      nxv8i32        = 137,  // n x  8 x i32
+      nxv16i32       = 138,  // n x 16 x i32
+      nxv32i32       = 139,  // n x 32 x i32
+
+      nxv1i64        = 140,  // n x  1 x i64
+      nxv2i64        = 141,  // n x  2 x i64
+      nxv4i64        = 142,  // n x  4 x i64
+      nxv8i64        = 143,  // n x  8 x i64
+      nxv16i64       = 144,  // n x 16 x i64
+      nxv32i64       = 145,  // n x 32 x i64
 
       FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
       LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
 
-      nxv1f16        = 142,  // n x  1 x f16
-      nxv2f16        = 143,  // n x  2 x f16
-      nxv4f16        = 144,  // n x  4 x f16
-      nxv8f16        = 145,  // n x  8 x f16
-      nxv16f16       = 146,  // n x 16 x f16
-      nxv32f16       = 147,  // n x 32 x f16
-
-      nxv1bf16       = 148,  // n x  1 x bf16
-      nxv2bf16       = 149,  // n x  2 x bf16
-      nxv4bf16       = 150,  // n x  4 x bf16
-      nxv8bf16       = 151,  // n x  8 x bf16
-
-      nxv1f32        = 152,  // n x  1 x f32
-      nxv2f32        = 153,  // n x  2 x f32
-      nxv4f32        = 154,  // n x  4 x f32
-      nxv8f32        = 155,  // n x  8 x f32
-      nxv16f32       = 156,  // n x 16 x f32
-
-      nxv1f64        = 157,  // n x  1 x f64
-      nxv2f64        = 158,  // n x  2 x f64
-      nxv4f64        = 159,  // n x  4 x f64
-      nxv8f64        = 160,  // n x  8 x f64
+      nxv1f16        = 146,  // n x  1 x f16
+      nxv2f16        = 147,  // n x  2 x f16
+      nxv4f16        = 148,  // n x  4 x f16
+      nxv8f16        = 149,  // n x  8 x f16
+      nxv16f16       = 150,  // n x 16 x f16
+      nxv32f16       = 151,  // n x 32 x f16
+
+      nxv1bf16       = 152,  // n x  1 x bf16
+      nxv2bf16       = 153,  // n x  2 x bf16
+      nxv4bf16       = 154,  // n x  4 x bf16
+      nxv8bf16       = 155,  // n x  8 x bf16
+
+      nxv1f32        = 156,  // n x  1 x f32
+      nxv2f32        = 157,  // n x  2 x f32
+      nxv4f32        = 158,  // n x  4 x f32
+      nxv8f32        = 159,  // n x  8 x f32
+      nxv16f32       = 160,  // n x 16 x f32
+
+      nxv1f64        = 161,  // n x  1 x f64
+      nxv2f64        = 162,  // n x  2 x f64
+      nxv4f64        = 163,  // n x  4 x f64
+      nxv8f64        = 164,  // n x  8 x f64
 
       FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16,
       LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
@@ -246,19 +250,19 @@ namespace llvm {
       FIRST_VECTOR_VALUETYPE = v1i1,
       LAST_VECTOR_VALUETYPE  = nxv8f64,
 
-      x86mmx         = 161,    // This is an X86 MMX value
+      x86mmx         = 165,    // This is an X86 MMX value
 
-      Glue           = 162,    // This glues nodes together during pre-RA sched
+      Glue           = 166,    // This glues nodes together during pre-RA sched
 
-      isVoid         = 163,    // This has no value
+      isVoid         = 167,    // This has no value
 
-      Untyped        = 164,    // This value takes a register, but has
+      Untyped        = 168,    // This value takes a register, but has
                                // unspecified type.  The register class
                                // will be determined by the opcode.
 
-      funcref        = 165,    // WebAssembly's funcref type
-      externref      = 166,    // WebAssembly's externref type
-      x86amx         = 167,    // This is an X86 AMX value
+      funcref        = 169,    // WebAssembly's funcref type
+      externref      = 170,    // WebAssembly's externref type
+      x86amx         = 171,    // This is an X86 AMX value
 
       FIRST_VALUETYPE =  1,    // This is always the beginning of the list.
       LAST_VALUETYPE = x86amx, // This always remains at the end of the list.
@@ -535,6 +539,8 @@ namespace llvm {
       case v64i8:
       case v128i8:
       case v256i8:
+      case v512i8:
+      case v1024i8:
       case nxv1i8:
       case nxv2i8:
       case nxv4i8:
@@ -552,6 +558,7 @@ namespace llvm {
       case v64i16:
       case v128i16:
       case v256i16:
+      case v512i16:
       case nxv1i16:
       case nxv2i16:
       case nxv4i16:
@@ -604,6 +611,7 @@ namespace llvm {
       case v64f16:
       case v128f16:
       case v256f16:
+      case v512f16:
       case nxv1f16:
       case nxv2f16:
       case nxv4f16:
@@ -665,10 +673,14 @@ namespace llvm {
       case v2048i32:
       case v2048f32: return 2048;
       case v1024i1:
+      case v1024i8:
       case v1024i32:
       case v1024f32: return 1024;
       case v512i1:
+      case v512i8:
+      case v512i16:
       case v512i32:
+      case v512f16:
       case v512f32: return 512;
       case v256i1:
       case v256i8:
@@ -986,14 +998,18 @@ namespace llvm {
       case v64f32:
       case v32f64: return TypeSize::Fixed(2048);
       case nxv32i64: return TypeSize::Scalable(2048);
+      case v512i8:
       case v256i16:
       case v128i32:
       case v64i64:
       case v256f16:
       case v128f32:
       case v64f64:  return TypeSize::Fixed(4096);
+      case v1024i8:
+      case v512i16:
       case v256i32:
       case v128i64:
+      case v512f16:
       case v256f32:
       case x86amx:
       case v128f64:  return TypeSize::Fixed(8192);
@@ -1158,6 +1174,8 @@ namespace llvm {
         if (NumElements == 64)  return MVT::v64i8;
         if (NumElements == 128) return MVT::v128i8;
         if (NumElements == 256) return MVT::v256i8;
+        if (NumElements == 512) return MVT::v512i8;
+        if (NumElements == 1024) return MVT::v1024i8;
         break;
       case MVT::i16:
         if (NumElements == 1)   return MVT::v1i16;
@@ -1170,6 +1188,7 @@ namespace llvm {
         if (NumElements == 64)  return MVT::v64i16;
         if (NumElements == 128) return MVT::v128i16;
         if (NumElements == 256) return MVT::v256i16;
+        if (NumElements == 512) return MVT::v512i16;
         break;
       case MVT::i32:
         if (NumElements == 1)    return MVT::v1i32;
@@ -1212,6 +1231,7 @@ namespace llvm {
         if (NumElements == 64)  return MVT::v64f16;
         if (NumElements == 128) return MVT::v128f16;
         if (NumElements == 256) return MVT::v256f16;
+        if (NumElements == 512) return MVT::v512f16;
         break;
       case MVT::bf16:
         if (NumElements == 2)   return MVT::v2bf16;

diff  --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 1672e5f77c768..28826bc48e429 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -238,6 +238,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
     return FixedVectorType::get(Type::getInt8Ty(Context), 128);
   case MVT::v256i8:
     return FixedVectorType::get(Type::getInt8Ty(Context), 256);
+  case MVT::v512i8:
+    return FixedVectorType::get(Type::getInt8Ty(Context), 512);
+  case MVT::v1024i8:
+    return FixedVectorType::get(Type::getInt8Ty(Context), 1024);
   case MVT::v1i16:
     return FixedVectorType::get(Type::getInt16Ty(Context), 1);
   case MVT::v2i16:
@@ -258,6 +262,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
     return FixedVectorType::get(Type::getInt16Ty(Context), 128);
   case MVT::v256i16:
     return FixedVectorType::get(Type::getInt16Ty(Context), 256);
+  case MVT::v512i16:
+    return FixedVectorType::get(Type::getInt16Ty(Context), 512);
   case MVT::v1i32:
     return FixedVectorType::get(Type::getInt32Ty(Context), 1);
   case MVT::v2i32:
@@ -326,6 +332,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
     return FixedVectorType::get(Type::getHalfTy(Context), 128);
   case MVT::v256f16:
     return FixedVectorType::get(Type::getHalfTy(Context), 256);
+  case MVT::v512f16:
+    return FixedVectorType::get(Type::getHalfTy(Context), 512);
   case MVT::v2bf16:
     return FixedVectorType::get(Type::getBFloatTy(Context), 2);
   case MVT::v3bf16:

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8439feae7ea32..94095e7c6a81f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1171,11 +1171,11 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
   if (!Subtarget.useRVVForFixedLengthVectors())
     return false;
 
-  // We only support a set of vector types with an equivalent number of
-  // elements to avoid legalization issues. Therefore -- since we don't have
-  // v512i8/v512i16/etc -- the longest fixed-length vector type we support has
-  // 256 elements.
-  if (VT.getVectorNumElements() > 256)
+  // We only support a set of vector types with a consistent maximum fixed size
+  // across all supported vector element types to avoid legalization issues.
+  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
+  // fixed-length vector type we support is 1024 bytes.
+  if (VT.getFixedSizeInBits() > 1024 * 8)
     return false;
 
   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
index ede12f6396bac..0ecd5de2b8227 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
@@ -1,9 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024
 
-; FIXME: A larger VLEN is producing worse code.
-; FIXME: v256i16 is legal so v512i8 should be too since they're the same size.
 define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) {
 ; VLEN256-LABEL: bitcast_1024B:
 ; VLEN256:       # %bb.0:
@@ -18,1188 +17,17 @@ define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) {
 ;
 ; VLEN512-LABEL: bitcast_1024B:
 ; VLEN512:       # %bb.0:
-; VLEN512-NEXT:    addi sp, sp, -1024
-; VLEN512-NEXT:    .cfi_def_cfa_offset 1024
-; VLEN512-NEXT:    sd ra, 1016(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s0, 1008(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s1, 1000(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s2, 992(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s3, 984(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s4, 976(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s5, 968(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s6, 960(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s7, 952(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s8, 944(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s9, 936(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s10, 928(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    sd s11, 920(sp) # 8-byte Folded Spill
-; VLEN512-NEXT:    .cfi_offset ra, -8
-; VLEN512-NEXT:    .cfi_offset s0, -16
-; VLEN512-NEXT:    .cfi_offset s1, -24
-; VLEN512-NEXT:    .cfi_offset s2, -32
-; VLEN512-NEXT:    .cfi_offset s3, -40
-; VLEN512-NEXT:    .cfi_offset s4, -48
-; VLEN512-NEXT:    .cfi_offset s5, -56
-; VLEN512-NEXT:    .cfi_offset s6, -64
-; VLEN512-NEXT:    .cfi_offset s7, -72
-; VLEN512-NEXT:    .cfi_offset s8, -80
-; VLEN512-NEXT:    .cfi_offset s9, -88
-; VLEN512-NEXT:    .cfi_offset s10, -96
-; VLEN512-NEXT:    .cfi_offset s11, -104
-; VLEN512-NEXT:    addi s0, sp, 1024
-; VLEN512-NEXT:    .cfi_def_cfa s0, 0
-; VLEN512-NEXT:    csrr a0, vlenb
-; VLEN512-NEXT:    slli a0, a0, 3
-; VLEN512-NEXT:    sub sp, sp, a0
-; VLEN512-NEXT:    andi sp, sp, -256
-; VLEN512-NEXT:    addi a0, zero, 32
-; VLEN512-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
-; VLEN512-NEXT:    vmv.x.s a6, v24
-; VLEN512-NEXT:    addi a0, zero, 33
-; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
-; VLEN512-NEXT:    addi a0, sp, 920
-; VLEN512-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
-; VLEN512-NEXT:    addi a0, zero, 34
-; VLEN512-NEXT:    addi a1, zero, 35
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s a7, v0
-; VLEN512-NEXT:    addi a1, zero, 36
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s t0, v0
-; VLEN512-NEXT:    addi a1, zero, 37
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s t1, v0
-; VLEN512-NEXT:    addi a1, zero, 38
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s t2, v0
-; VLEN512-NEXT:    addi a1, zero, 39
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s t3, v0
-; VLEN512-NEXT:    addi a1, zero, 40
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s t4, v0
-; VLEN512-NEXT:    addi a1, zero, 41
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s t5, v0
-; VLEN512-NEXT:    addi a1, zero, 42
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s t6, v0
-; VLEN512-NEXT:    addi a1, zero, 43
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s2, v0
-; VLEN512-NEXT:    addi a1, zero, 44
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s3, v0
-; VLEN512-NEXT:    addi a1, zero, 45
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s4, v0
-; VLEN512-NEXT:    addi a1, zero, 46
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s5, v0
-; VLEN512-NEXT:    addi a1, zero, 47
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s6, v0
-; VLEN512-NEXT:    addi a1, zero, 48
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s7, v0
-; VLEN512-NEXT:    addi a1, zero, 49
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s8, v0
-; VLEN512-NEXT:    addi a1, zero, 50
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s9, v0
-; VLEN512-NEXT:    addi a1, zero, 51
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s10, v0
-; VLEN512-NEXT:    addi a1, zero, 52
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s s11, v0
-; VLEN512-NEXT:    addi a1, zero, 53
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s ra, v0
-; VLEN512-NEXT:    addi a1, zero, 54
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
-; VLEN512-NEXT:    vmv.x.s a1, v0
-; VLEN512-NEXT:    addi a2, zero, 55
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a2
-; VLEN512-NEXT:    vmv.x.s a2, v0
-; VLEN512-NEXT:    addi a3, zero, 56
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a3
-; VLEN512-NEXT:    vmv.x.s s1, v0
-; VLEN512-NEXT:    addi a3, zero, 57
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a3
-; VLEN512-NEXT:    vmv.x.s a3, v0
-; VLEN512-NEXT:    addi a4, zero, 58
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a4
-; VLEN512-NEXT:    vmv.x.s a4, v0
-; VLEN512-NEXT:    addi a5, zero, 63
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a5
-; VLEN512-NEXT:    vmv.x.s a5, v0
-; VLEN512-NEXT:    vslidedown.vx v0, v8, a0
-; VLEN512-NEXT:    srli a0, a5, 56
-; VLEN512-NEXT:    sb a0, 511(sp)
-; VLEN512-NEXT:    srli a0, a5, 48
-; VLEN512-NEXT:    sb a0, 510(sp)
-; VLEN512-NEXT:    srli a0, a5, 40
-; VLEN512-NEXT:    sb a0, 509(sp)
-; VLEN512-NEXT:    srli a0, a5, 32
-; VLEN512-NEXT:    sb a0, 508(sp)
-; VLEN512-NEXT:    srli a0, a5, 24
-; VLEN512-NEXT:    sb a0, 507(sp)
-; VLEN512-NEXT:    srli a0, a5, 16
-; VLEN512-NEXT:    sb a0, 506(sp)
-; VLEN512-NEXT:    addi a0, zero, 62
-; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    sb a5, 504(sp)
-; VLEN512-NEXT:    srli a5, a5, 8
-; VLEN512-NEXT:    sb a5, 505(sp)
-; VLEN512-NEXT:    srli a5, a0, 56
-; VLEN512-NEXT:    sb a5, 503(sp)
-; VLEN512-NEXT:    srli a5, a0, 48
-; VLEN512-NEXT:    sb a5, 502(sp)
-; VLEN512-NEXT:    srli a5, a0, 40
-; VLEN512-NEXT:    sb a5, 501(sp)
-; VLEN512-NEXT:    srli a5, a0, 32
-; VLEN512-NEXT:    sb a5, 500(sp)
-; VLEN512-NEXT:    srli a5, a0, 24
-; VLEN512-NEXT:    sb a5, 499(sp)
-; VLEN512-NEXT:    srli a5, a0, 16
-; VLEN512-NEXT:    sb a5, 498(sp)
-; VLEN512-NEXT:    addi a5, zero, 61
-; VLEN512-NEXT:    vslidedown.vx v24, v8, a5
-; VLEN512-NEXT:    vmv.x.s a5, v24
-; VLEN512-NEXT:    sb a0, 496(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 497(sp)
-; VLEN512-NEXT:    srli a0, a5, 56
-; VLEN512-NEXT:    sb a0, 495(sp)
-; VLEN512-NEXT:    srli a0, a5, 48
-; VLEN512-NEXT:    sb a0, 494(sp)
-; VLEN512-NEXT:    srli a0, a5, 40
-; VLEN512-NEXT:    sb a0, 493(sp)
-; VLEN512-NEXT:    srli a0, a5, 32
-; VLEN512-NEXT:    sb a0, 492(sp)
-; VLEN512-NEXT:    srli a0, a5, 24
-; VLEN512-NEXT:    sb a0, 491(sp)
-; VLEN512-NEXT:    srli a0, a5, 16
-; VLEN512-NEXT:    sb a0, 490(sp)
-; VLEN512-NEXT:    addi a0, zero, 60
-; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    sb a5, 488(sp)
-; VLEN512-NEXT:    srli a5, a5, 8
-; VLEN512-NEXT:    sb a5, 489(sp)
-; VLEN512-NEXT:    srli a5, a0, 56
-; VLEN512-NEXT:    sb a5, 487(sp)
-; VLEN512-NEXT:    srli a5, a0, 48
-; VLEN512-NEXT:    sb a5, 486(sp)
-; VLEN512-NEXT:    srli a5, a0, 40
-; VLEN512-NEXT:    sb a5, 485(sp)
-; VLEN512-NEXT:    srli a5, a0, 32
-; VLEN512-NEXT:    sb a5, 484(sp)
-; VLEN512-NEXT:    srli a5, a0, 24
-; VLEN512-NEXT:    sb a5, 483(sp)
-; VLEN512-NEXT:    srli a5, a0, 16
-; VLEN512-NEXT:    sb a5, 482(sp)
-; VLEN512-NEXT:    addi a5, zero, 59
-; VLEN512-NEXT:    vslidedown.vx v24, v8, a5
-; VLEN512-NEXT:    vmv.x.s a5, v24
-; VLEN512-NEXT:    sb a0, 480(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 481(sp)
-; VLEN512-NEXT:    srli a0, a5, 56
-; VLEN512-NEXT:    sb a0, 479(sp)
-; VLEN512-NEXT:    srli a0, a5, 48
-; VLEN512-NEXT:    sb a0, 478(sp)
-; VLEN512-NEXT:    srli a0, a5, 40
-; VLEN512-NEXT:    sb a0, 477(sp)
-; VLEN512-NEXT:    srli a0, a5, 32
-; VLEN512-NEXT:    sb a0, 476(sp)
-; VLEN512-NEXT:    srli a0, a5, 24
-; VLEN512-NEXT:    sb a0, 475(sp)
-; VLEN512-NEXT:    srli a0, a5, 16
-; VLEN512-NEXT:    sb a0, 474(sp)
-; VLEN512-NEXT:    sb a5, 472(sp)
-; VLEN512-NEXT:    srli a0, a5, 8
-; VLEN512-NEXT:    sb a0, 473(sp)
-; VLEN512-NEXT:    srli a0, a4, 56
-; VLEN512-NEXT:    sb a0, 471(sp)
-; VLEN512-NEXT:    srli a0, a4, 48
-; VLEN512-NEXT:    sb a0, 470(sp)
-; VLEN512-NEXT:    srli a0, a4, 40
-; VLEN512-NEXT:    sb a0, 469(sp)
-; VLEN512-NEXT:    srli a0, a4, 32
-; VLEN512-NEXT:    sb a0, 468(sp)
-; VLEN512-NEXT:    srli a0, a4, 24
-; VLEN512-NEXT:    sb a0, 467(sp)
-; VLEN512-NEXT:    srli a0, a4, 16
-; VLEN512-NEXT:    sb a0, 466(sp)
-; VLEN512-NEXT:    sb a4, 464(sp)
-; VLEN512-NEXT:    srli a0, a4, 8
-; VLEN512-NEXT:    sb a0, 465(sp)
-; VLEN512-NEXT:    srli a0, a3, 56
-; VLEN512-NEXT:    sb a0, 463(sp)
-; VLEN512-NEXT:    srli a0, a3, 48
-; VLEN512-NEXT:    sb a0, 462(sp)
-; VLEN512-NEXT:    srli a0, a3, 40
-; VLEN512-NEXT:    sb a0, 461(sp)
-; VLEN512-NEXT:    srli a0, a3, 32
-; VLEN512-NEXT:    sb a0, 460(sp)
-; VLEN512-NEXT:    srli a0, a3, 24
-; VLEN512-NEXT:    sb a0, 459(sp)
-; VLEN512-NEXT:    srli a0, a3, 16
-; VLEN512-NEXT:    sb a0, 458(sp)
-; VLEN512-NEXT:    sb a3, 456(sp)
-; VLEN512-NEXT:    srli a0, a3, 8
-; VLEN512-NEXT:    sb a0, 457(sp)
-; VLEN512-NEXT:    srli a0, s1, 56
-; VLEN512-NEXT:    sb a0, 455(sp)
-; VLEN512-NEXT:    srli a0, s1, 48
-; VLEN512-NEXT:    sb a0, 454(sp)
-; VLEN512-NEXT:    srli a0, s1, 40
-; VLEN512-NEXT:    sb a0, 453(sp)
-; VLEN512-NEXT:    srli a0, s1, 32
-; VLEN512-NEXT:    sb a0, 452(sp)
-; VLEN512-NEXT:    srli a0, s1, 24
-; VLEN512-NEXT:    sb a0, 451(sp)
-; VLEN512-NEXT:    srli a0, s1, 16
-; VLEN512-NEXT:    sb a0, 450(sp)
-; VLEN512-NEXT:    sb s1, 448(sp)
-; VLEN512-NEXT:    srli a0, s1, 8
-; VLEN512-NEXT:    sb a0, 449(sp)
-; VLEN512-NEXT:    srli a0, a2, 56
-; VLEN512-NEXT:    sb a0, 447(sp)
-; VLEN512-NEXT:    srli a0, a2, 48
-; VLEN512-NEXT:    sb a0, 446(sp)
-; VLEN512-NEXT:    srli a0, a2, 40
-; VLEN512-NEXT:    sb a0, 445(sp)
-; VLEN512-NEXT:    srli a0, a2, 32
-; VLEN512-NEXT:    sb a0, 444(sp)
-; VLEN512-NEXT:    srli a0, a2, 24
-; VLEN512-NEXT:    sb a0, 443(sp)
-; VLEN512-NEXT:    srli a0, a2, 16
-; VLEN512-NEXT:    sb a0, 442(sp)
-; VLEN512-NEXT:    sb a2, 440(sp)
-; VLEN512-NEXT:    srli a0, a2, 8
-; VLEN512-NEXT:    sb a0, 441(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 439(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 438(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 437(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 436(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 435(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 434(sp)
-; VLEN512-NEXT:    sb a1, 432(sp)
-; VLEN512-NEXT:    srli a0, a1, 8
-; VLEN512-NEXT:    sb a0, 433(sp)
-; VLEN512-NEXT:    srli a0, ra, 56
-; VLEN512-NEXT:    sb a0, 431(sp)
-; VLEN512-NEXT:    srli a0, ra, 48
-; VLEN512-NEXT:    sb a0, 430(sp)
-; VLEN512-NEXT:    srli a0, ra, 40
-; VLEN512-NEXT:    sb a0, 429(sp)
-; VLEN512-NEXT:    srli a0, ra, 32
-; VLEN512-NEXT:    sb a0, 428(sp)
-; VLEN512-NEXT:    srli a0, ra, 24
-; VLEN512-NEXT:    sb a0, 427(sp)
-; VLEN512-NEXT:    srli a0, ra, 16
-; VLEN512-NEXT:    sb a0, 426(sp)
-; VLEN512-NEXT:    sb ra, 424(sp)
-; VLEN512-NEXT:    srli a0, ra, 8
-; VLEN512-NEXT:    sb a0, 425(sp)
-; VLEN512-NEXT:    srli a0, s11, 56
-; VLEN512-NEXT:    sb a0, 423(sp)
-; VLEN512-NEXT:    srli a0, s11, 48
-; VLEN512-NEXT:    sb a0, 422(sp)
-; VLEN512-NEXT:    srli a0, s11, 40
-; VLEN512-NEXT:    sb a0, 421(sp)
-; VLEN512-NEXT:    srli a0, s11, 32
-; VLEN512-NEXT:    sb a0, 420(sp)
-; VLEN512-NEXT:    srli a0, s11, 24
-; VLEN512-NEXT:    sb a0, 419(sp)
-; VLEN512-NEXT:    srli a0, s11, 16
-; VLEN512-NEXT:    sb a0, 418(sp)
-; VLEN512-NEXT:    sb s11, 416(sp)
-; VLEN512-NEXT:    srli a0, s11, 8
-; VLEN512-NEXT:    sb a0, 417(sp)
-; VLEN512-NEXT:    srli a0, s10, 56
-; VLEN512-NEXT:    sb a0, 415(sp)
-; VLEN512-NEXT:    srli a0, s10, 48
-; VLEN512-NEXT:    sb a0, 414(sp)
-; VLEN512-NEXT:    srli a0, s10, 40
-; VLEN512-NEXT:    sb a0, 413(sp)
-; VLEN512-NEXT:    srli a0, s10, 32
-; VLEN512-NEXT:    sb a0, 412(sp)
-; VLEN512-NEXT:    srli a0, s10, 24
-; VLEN512-NEXT:    sb a0, 411(sp)
-; VLEN512-NEXT:    srli a0, s10, 16
-; VLEN512-NEXT:    sb a0, 410(sp)
-; VLEN512-NEXT:    sb s10, 408(sp)
-; VLEN512-NEXT:    srli a0, s10, 8
-; VLEN512-NEXT:    sb a0, 409(sp)
-; VLEN512-NEXT:    srli a0, s9, 56
-; VLEN512-NEXT:    sb a0, 407(sp)
-; VLEN512-NEXT:    srli a0, s9, 48
-; VLEN512-NEXT:    sb a0, 406(sp)
-; VLEN512-NEXT:    srli a0, s9, 40
-; VLEN512-NEXT:    sb a0, 405(sp)
-; VLEN512-NEXT:    srli a0, s9, 32
-; VLEN512-NEXT:    sb a0, 404(sp)
-; VLEN512-NEXT:    srli a0, s9, 24
-; VLEN512-NEXT:    sb a0, 403(sp)
-; VLEN512-NEXT:    srli a0, s9, 16
-; VLEN512-NEXT:    sb a0, 402(sp)
-; VLEN512-NEXT:    sb s9, 400(sp)
-; VLEN512-NEXT:    srli a0, s9, 8
-; VLEN512-NEXT:    sb a0, 401(sp)
-; VLEN512-NEXT:    srli a0, s8, 56
-; VLEN512-NEXT:    sb a0, 399(sp)
-; VLEN512-NEXT:    srli a0, s8, 48
-; VLEN512-NEXT:    sb a0, 398(sp)
-; VLEN512-NEXT:    srli a0, s8, 40
-; VLEN512-NEXT:    sb a0, 397(sp)
-; VLEN512-NEXT:    srli a0, s8, 32
-; VLEN512-NEXT:    sb a0, 396(sp)
-; VLEN512-NEXT:    srli a0, s8, 24
-; VLEN512-NEXT:    sb a0, 395(sp)
-; VLEN512-NEXT:    srli a0, s8, 16
-; VLEN512-NEXT:    sb a0, 394(sp)
-; VLEN512-NEXT:    sb s8, 392(sp)
-; VLEN512-NEXT:    srli a0, s8, 8
-; VLEN512-NEXT:    sb a0, 393(sp)
-; VLEN512-NEXT:    srli a0, s7, 56
-; VLEN512-NEXT:    sb a0, 391(sp)
-; VLEN512-NEXT:    srli a0, s7, 48
-; VLEN512-NEXT:    sb a0, 390(sp)
-; VLEN512-NEXT:    srli a0, s7, 40
-; VLEN512-NEXT:    sb a0, 389(sp)
-; VLEN512-NEXT:    srli a0, s7, 32
-; VLEN512-NEXT:    sb a0, 388(sp)
-; VLEN512-NEXT:    srli a0, s7, 24
-; VLEN512-NEXT:    sb a0, 387(sp)
-; VLEN512-NEXT:    srli a0, s7, 16
-; VLEN512-NEXT:    sb a0, 386(sp)
-; VLEN512-NEXT:    sb s7, 384(sp)
-; VLEN512-NEXT:    srli a0, s7, 8
-; VLEN512-NEXT:    sb a0, 385(sp)
-; VLEN512-NEXT:    srli a0, s6, 56
-; VLEN512-NEXT:    sb a0, 383(sp)
-; VLEN512-NEXT:    srli a0, s6, 48
-; VLEN512-NEXT:    sb a0, 382(sp)
-; VLEN512-NEXT:    srli a0, s6, 40
-; VLEN512-NEXT:    sb a0, 381(sp)
-; VLEN512-NEXT:    srli a0, s6, 32
-; VLEN512-NEXT:    sb a0, 380(sp)
-; VLEN512-NEXT:    srli a0, s6, 24
-; VLEN512-NEXT:    sb a0, 379(sp)
-; VLEN512-NEXT:    srli a0, s6, 16
-; VLEN512-NEXT:    sb a0, 378(sp)
-; VLEN512-NEXT:    sb s6, 376(sp)
-; VLEN512-NEXT:    srli a0, s6, 8
-; VLEN512-NEXT:    sb a0, 377(sp)
-; VLEN512-NEXT:    srli a0, s5, 56
-; VLEN512-NEXT:    sb a0, 375(sp)
-; VLEN512-NEXT:    srli a0, s5, 48
-; VLEN512-NEXT:    sb a0, 374(sp)
-; VLEN512-NEXT:    srli a0, s5, 40
-; VLEN512-NEXT:    sb a0, 373(sp)
-; VLEN512-NEXT:    srli a0, s5, 32
-; VLEN512-NEXT:    sb a0, 372(sp)
-; VLEN512-NEXT:    srli a0, s5, 24
-; VLEN512-NEXT:    sb a0, 371(sp)
-; VLEN512-NEXT:    srli a0, s5, 16
-; VLEN512-NEXT:    sb a0, 370(sp)
-; VLEN512-NEXT:    sb s5, 368(sp)
-; VLEN512-NEXT:    srli a0, s5, 8
-; VLEN512-NEXT:    sb a0, 369(sp)
-; VLEN512-NEXT:    srli a0, s4, 56
-; VLEN512-NEXT:    sb a0, 367(sp)
-; VLEN512-NEXT:    srli a0, s4, 48
-; VLEN512-NEXT:    sb a0, 366(sp)
-; VLEN512-NEXT:    srli a0, s4, 40
-; VLEN512-NEXT:    sb a0, 365(sp)
-; VLEN512-NEXT:    srli a0, s4, 32
-; VLEN512-NEXT:    sb a0, 364(sp)
-; VLEN512-NEXT:    srli a0, s4, 24
-; VLEN512-NEXT:    sb a0, 363(sp)
-; VLEN512-NEXT:    srli a0, s4, 16
-; VLEN512-NEXT:    sb a0, 362(sp)
-; VLEN512-NEXT:    sb s4, 360(sp)
-; VLEN512-NEXT:    srli a0, s4, 8
-; VLEN512-NEXT:    sb a0, 361(sp)
-; VLEN512-NEXT:    srli a0, s3, 56
-; VLEN512-NEXT:    sb a0, 359(sp)
-; VLEN512-NEXT:    srli a0, s3, 48
-; VLEN512-NEXT:    sb a0, 358(sp)
-; VLEN512-NEXT:    srli a0, s3, 40
-; VLEN512-NEXT:    sb a0, 357(sp)
-; VLEN512-NEXT:    srli a0, s3, 32
-; VLEN512-NEXT:    sb a0, 356(sp)
-; VLEN512-NEXT:    srli a0, s3, 24
-; VLEN512-NEXT:    sb a0, 355(sp)
-; VLEN512-NEXT:    srli a0, s3, 16
-; VLEN512-NEXT:    sb a0, 354(sp)
-; VLEN512-NEXT:    sb s3, 352(sp)
-; VLEN512-NEXT:    srli a0, s3, 8
-; VLEN512-NEXT:    sb a0, 353(sp)
-; VLEN512-NEXT:    srli a0, s2, 56
-; VLEN512-NEXT:    sb a0, 351(sp)
-; VLEN512-NEXT:    srli a0, s2, 48
-; VLEN512-NEXT:    sb a0, 350(sp)
-; VLEN512-NEXT:    srli a0, s2, 40
-; VLEN512-NEXT:    sb a0, 349(sp)
-; VLEN512-NEXT:    srli a0, s2, 32
-; VLEN512-NEXT:    sb a0, 348(sp)
-; VLEN512-NEXT:    srli a0, s2, 24
-; VLEN512-NEXT:    sb a0, 347(sp)
-; VLEN512-NEXT:    srli a0, s2, 16
-; VLEN512-NEXT:    sb a0, 346(sp)
-; VLEN512-NEXT:    sb s2, 344(sp)
-; VLEN512-NEXT:    srli a0, s2, 8
-; VLEN512-NEXT:    sb a0, 345(sp)
-; VLEN512-NEXT:    srli a0, t6, 56
-; VLEN512-NEXT:    sb a0, 343(sp)
-; VLEN512-NEXT:    srli a0, t6, 48
-; VLEN512-NEXT:    sb a0, 342(sp)
-; VLEN512-NEXT:    srli a0, t6, 40
-; VLEN512-NEXT:    sb a0, 341(sp)
-; VLEN512-NEXT:    srli a0, t6, 32
-; VLEN512-NEXT:    sb a0, 340(sp)
-; VLEN512-NEXT:    srli a0, t6, 24
-; VLEN512-NEXT:    sb a0, 339(sp)
-; VLEN512-NEXT:    srli a0, t6, 16
-; VLEN512-NEXT:    sb a0, 338(sp)
-; VLEN512-NEXT:    sb t6, 336(sp)
-; VLEN512-NEXT:    srli a0, t6, 8
-; VLEN512-NEXT:    sb a0, 337(sp)
-; VLEN512-NEXT:    srli a0, t5, 56
-; VLEN512-NEXT:    sb a0, 335(sp)
-; VLEN512-NEXT:    srli a0, t5, 48
-; VLEN512-NEXT:    sb a0, 334(sp)
-; VLEN512-NEXT:    srli a0, t5, 40
-; VLEN512-NEXT:    sb a0, 333(sp)
-; VLEN512-NEXT:    srli a0, t5, 32
-; VLEN512-NEXT:    sb a0, 332(sp)
-; VLEN512-NEXT:    srli a0, t5, 24
-; VLEN512-NEXT:    sb a0, 331(sp)
-; VLEN512-NEXT:    srli a0, t5, 16
-; VLEN512-NEXT:    sb a0, 330(sp)
-; VLEN512-NEXT:    sb t5, 328(sp)
-; VLEN512-NEXT:    srli a0, t5, 8
-; VLEN512-NEXT:    sb a0, 329(sp)
-; VLEN512-NEXT:    srli a0, t4, 56
-; VLEN512-NEXT:    sb a0, 327(sp)
-; VLEN512-NEXT:    srli a0, t4, 48
-; VLEN512-NEXT:    sb a0, 326(sp)
-; VLEN512-NEXT:    srli a0, t4, 40
-; VLEN512-NEXT:    sb a0, 325(sp)
-; VLEN512-NEXT:    srli a0, t4, 32
-; VLEN512-NEXT:    sb a0, 324(sp)
-; VLEN512-NEXT:    srli a0, t4, 24
-; VLEN512-NEXT:    sb a0, 323(sp)
-; VLEN512-NEXT:    srli a0, t4, 16
-; VLEN512-NEXT:    sb a0, 322(sp)
-; VLEN512-NEXT:    sb t4, 320(sp)
-; VLEN512-NEXT:    srli a0, t4, 8
-; VLEN512-NEXT:    sb a0, 321(sp)
-; VLEN512-NEXT:    srli a0, t3, 56
-; VLEN512-NEXT:    sb a0, 319(sp)
-; VLEN512-NEXT:    srli a0, t3, 48
-; VLEN512-NEXT:    sb a0, 318(sp)
-; VLEN512-NEXT:    srli a0, t3, 40
-; VLEN512-NEXT:    sb a0, 317(sp)
-; VLEN512-NEXT:    srli a0, t3, 32
-; VLEN512-NEXT:    sb a0, 316(sp)
-; VLEN512-NEXT:    srli a0, t3, 24
-; VLEN512-NEXT:    sb a0, 315(sp)
-; VLEN512-NEXT:    srli a0, t3, 16
-; VLEN512-NEXT:    sb a0, 314(sp)
-; VLEN512-NEXT:    sb t3, 312(sp)
-; VLEN512-NEXT:    srli a0, t3, 8
-; VLEN512-NEXT:    sb a0, 313(sp)
-; VLEN512-NEXT:    srli a0, t2, 56
-; VLEN512-NEXT:    sb a0, 311(sp)
-; VLEN512-NEXT:    srli a0, t2, 48
-; VLEN512-NEXT:    sb a0, 310(sp)
-; VLEN512-NEXT:    srli a0, t2, 40
-; VLEN512-NEXT:    sb a0, 309(sp)
-; VLEN512-NEXT:    srli a0, t2, 32
-; VLEN512-NEXT:    sb a0, 308(sp)
-; VLEN512-NEXT:    srli a0, t2, 24
-; VLEN512-NEXT:    sb a0, 307(sp)
-; VLEN512-NEXT:    srli a0, t2, 16
-; VLEN512-NEXT:    sb a0, 306(sp)
-; VLEN512-NEXT:    sb t2, 304(sp)
-; VLEN512-NEXT:    srli a0, t2, 8
-; VLEN512-NEXT:    sb a0, 305(sp)
-; VLEN512-NEXT:    srli a0, t1, 56
-; VLEN512-NEXT:    sb a0, 303(sp)
-; VLEN512-NEXT:    srli a0, t1, 48
-; VLEN512-NEXT:    sb a0, 302(sp)
-; VLEN512-NEXT:    srli a0, t1, 40
-; VLEN512-NEXT:    sb a0, 301(sp)
-; VLEN512-NEXT:    srli a0, t1, 32
-; VLEN512-NEXT:    sb a0, 300(sp)
-; VLEN512-NEXT:    srli a0, t1, 24
-; VLEN512-NEXT:    sb a0, 299(sp)
-; VLEN512-NEXT:    srli a0, t1, 16
-; VLEN512-NEXT:    sb a0, 298(sp)
-; VLEN512-NEXT:    sb t1, 296(sp)
-; VLEN512-NEXT:    srli a0, t1, 8
-; VLEN512-NEXT:    sb a0, 297(sp)
-; VLEN512-NEXT:    srli a0, t0, 56
-; VLEN512-NEXT:    sb a0, 295(sp)
-; VLEN512-NEXT:    srli a0, t0, 48
-; VLEN512-NEXT:    sb a0, 294(sp)
-; VLEN512-NEXT:    srli a0, t0, 40
-; VLEN512-NEXT:    sb a0, 293(sp)
-; VLEN512-NEXT:    srli a0, t0, 32
-; VLEN512-NEXT:    sb a0, 292(sp)
-; VLEN512-NEXT:    srli a0, t0, 24
-; VLEN512-NEXT:    sb a0, 291(sp)
-; VLEN512-NEXT:    srli a0, t0, 16
-; VLEN512-NEXT:    sb a0, 290(sp)
-; VLEN512-NEXT:    sb t0, 288(sp)
-; VLEN512-NEXT:    srli a0, t0, 8
-; VLEN512-NEXT:    sb a0, 289(sp)
-; VLEN512-NEXT:    srli a0, a7, 56
-; VLEN512-NEXT:    sb a0, 287(sp)
-; VLEN512-NEXT:    srli a0, a7, 48
-; VLEN512-NEXT:    sb a0, 286(sp)
-; VLEN512-NEXT:    srli a0, a7, 40
-; VLEN512-NEXT:    sb a0, 285(sp)
-; VLEN512-NEXT:    srli a0, a7, 32
-; VLEN512-NEXT:    sb a0, 284(sp)
-; VLEN512-NEXT:    srli a0, a7, 24
-; VLEN512-NEXT:    sb a0, 283(sp)
-; VLEN512-NEXT:    srli a0, a7, 16
-; VLEN512-NEXT:    sb a0, 282(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v0
-; VLEN512-NEXT:    sb a7, 280(sp)
-; VLEN512-NEXT:    srli a1, a7, 8
-; VLEN512-NEXT:    sb a1, 281(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 279(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 278(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 277(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 276(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 275(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 274(sp)
-; VLEN512-NEXT:    addi a1, sp, 920
-; VLEN512-NEXT:    vl8re8.v v24, (a1) # Unknown-size Folded Reload
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    sb a0, 272(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 273(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 271(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 270(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 269(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 268(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 267(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 266(sp)
-; VLEN512-NEXT:    srli a0, a6, 16
-; VLEN512-NEXT:    sb a1, 264(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 265(sp)
-; VLEN512-NEXT:    srli a1, a6, 56
-; VLEN512-NEXT:    sb a1, 263(sp)
-; VLEN512-NEXT:    srli a1, a6, 48
-; VLEN512-NEXT:    sb a1, 262(sp)
-; VLEN512-NEXT:    srli a1, a6, 40
-; VLEN512-NEXT:    sb a1, 261(sp)
-; VLEN512-NEXT:    srli a1, a6, 32
-; VLEN512-NEXT:    sb a1, 260(sp)
-; VLEN512-NEXT:    srli a1, a6, 24
-; VLEN512-NEXT:    sb a1, 259(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v8
-; VLEN512-NEXT:    sb a0, 258(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a6, 256(sp)
-; VLEN512-NEXT:    srli a2, a6, 8
-; VLEN512-NEXT:    sb a2, 257(sp)
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 31
-; VLEN512-NEXT:    sb a0, 519(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 518(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 517(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 516(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 515(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 514(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 30
-; VLEN512-NEXT:    sb a1, 512(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 513(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 767(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 766(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 765(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 764(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 763(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 762(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 29
-; VLEN512-NEXT:    sb a0, 760(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 761(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 759(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 758(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 757(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 756(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 755(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 754(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 28
-; VLEN512-NEXT:    sb a1, 752(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 753(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 751(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 750(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 749(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 748(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 747(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 746(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 27
-; VLEN512-NEXT:    sb a0, 744(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 745(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 743(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 742(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 741(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 740(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 739(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 738(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 26
-; VLEN512-NEXT:    sb a1, 736(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 737(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 735(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 734(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 733(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 732(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 731(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 730(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 25
-; VLEN512-NEXT:    sb a0, 728(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 729(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 727(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 726(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 725(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 724(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 723(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 722(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 24
-; VLEN512-NEXT:    sb a1, 720(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 721(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 719(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 718(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 717(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 716(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 715(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 714(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 23
-; VLEN512-NEXT:    sb a0, 712(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 713(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 711(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 710(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 709(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 708(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 707(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 706(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 22
-; VLEN512-NEXT:    sb a1, 704(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 705(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 703(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 702(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 701(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 700(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 699(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 698(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 21
-; VLEN512-NEXT:    sb a0, 696(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 697(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 695(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 694(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 693(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 692(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 691(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 690(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 20
-; VLEN512-NEXT:    sb a1, 688(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 689(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 687(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 686(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 685(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 684(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 683(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 682(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 19
-; VLEN512-NEXT:    sb a0, 680(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 681(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 679(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 678(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 677(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 676(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 675(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 674(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 18
-; VLEN512-NEXT:    sb a1, 672(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 673(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 671(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 670(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 669(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 668(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 667(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 666(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 17
-; VLEN512-NEXT:    sb a0, 664(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 665(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 663(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 662(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 661(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 660(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 659(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 658(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 16
-; VLEN512-NEXT:    sb a1, 656(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 657(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 655(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 654(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 653(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 652(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 651(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 650(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 15
-; VLEN512-NEXT:    sb a0, 648(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 649(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 647(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 646(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 645(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 644(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 643(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 642(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 14
-; VLEN512-NEXT:    sb a1, 640(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 641(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 639(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 638(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 637(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 636(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 635(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 634(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 13
-; VLEN512-NEXT:    sb a0, 632(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 633(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 631(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 630(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 629(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 628(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 627(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 626(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 12
-; VLEN512-NEXT:    sb a1, 624(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 625(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 623(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 622(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 621(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 620(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 619(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 618(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 11
-; VLEN512-NEXT:    sb a0, 616(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 617(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 615(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 614(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 613(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 612(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 611(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 610(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 10
-; VLEN512-NEXT:    sb a1, 608(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 609(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 607(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 606(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 605(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 604(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 603(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 602(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 9
-; VLEN512-NEXT:    sb a0, 600(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 601(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 599(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 598(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 597(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 596(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 595(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 594(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 8
-; VLEN512-NEXT:    sb a1, 592(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 593(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 591(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 590(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 589(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 588(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 587(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 586(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 7
-; VLEN512-NEXT:    sb a0, 584(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 585(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 583(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 582(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 581(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 580(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 579(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 578(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 6
-; VLEN512-NEXT:    sb a1, 576(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 577(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 575(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 574(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 573(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 572(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 571(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 570(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 5
-; VLEN512-NEXT:    sb a0, 568(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 569(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 567(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 566(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 565(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 564(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 563(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 562(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 4
-; VLEN512-NEXT:    sb a1, 560(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 561(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 559(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 558(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 557(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 556(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 555(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 554(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 3
-; VLEN512-NEXT:    sb a0, 552(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 553(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 551(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 550(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 549(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 548(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 547(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 546(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    vslidedown.vi v24, v8, 1
-; VLEN512-NEXT:    vslidedown.vi v8, v8, 2
-; VLEN512-NEXT:    sb a1, 544(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 545(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 543(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 542(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 541(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 540(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 539(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 538(sp)
-; VLEN512-NEXT:    vmv.x.s a1, v8
-; VLEN512-NEXT:    sb a0, 536(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 537(sp)
-; VLEN512-NEXT:    srli a0, a1, 56
-; VLEN512-NEXT:    sb a0, 535(sp)
-; VLEN512-NEXT:    srli a0, a1, 48
-; VLEN512-NEXT:    sb a0, 534(sp)
-; VLEN512-NEXT:    srli a0, a1, 40
-; VLEN512-NEXT:    sb a0, 533(sp)
-; VLEN512-NEXT:    srli a0, a1, 32
-; VLEN512-NEXT:    sb a0, 532(sp)
-; VLEN512-NEXT:    srli a0, a1, 24
-; VLEN512-NEXT:    sb a0, 531(sp)
-; VLEN512-NEXT:    srli a0, a1, 16
-; VLEN512-NEXT:    sb a0, 530(sp)
-; VLEN512-NEXT:    vmv.x.s a0, v24
-; VLEN512-NEXT:    sb a1, 528(sp)
-; VLEN512-NEXT:    srli a1, a1, 8
-; VLEN512-NEXT:    sb a1, 529(sp)
-; VLEN512-NEXT:    srli a1, a0, 56
-; VLEN512-NEXT:    sb a1, 527(sp)
-; VLEN512-NEXT:    srli a1, a0, 48
-; VLEN512-NEXT:    sb a1, 526(sp)
-; VLEN512-NEXT:    srli a1, a0, 40
-; VLEN512-NEXT:    sb a1, 525(sp)
-; VLEN512-NEXT:    srli a1, a0, 32
-; VLEN512-NEXT:    sb a1, 524(sp)
-; VLEN512-NEXT:    srli a1, a0, 24
-; VLEN512-NEXT:    sb a1, 523(sp)
-; VLEN512-NEXT:    srli a1, a0, 16
-; VLEN512-NEXT:    sb a1, 522(sp)
-; VLEN512-NEXT:    sb a0, 520(sp)
-; VLEN512-NEXT:    srli a0, a0, 8
-; VLEN512-NEXT:    sb a0, 521(sp)
-; VLEN512-NEXT:    addi a0, zero, 256
-; VLEN512-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; VLEN512-NEXT:    addi a0, sp, 512
-; VLEN512-NEXT:    vle8.v v28, (a0)
-; VLEN512-NEXT:    addi a0, sp, 256
-; VLEN512-NEXT:    vle8.v v12, (a0)
-; VLEN512-NEXT:    vadd.vv v8, v16, v28
-; VLEN512-NEXT:    vadd.vv v12, v20, v12
-; VLEN512-NEXT:    addi sp, s0, -1024
-; VLEN512-NEXT:    ld s11, 920(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s10, 928(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s9, 936(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s8, 944(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s7, 952(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s6, 960(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s5, 968(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s4, 976(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s3, 984(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s2, 992(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s1, 1000(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld s0, 1008(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    ld ra, 1016(sp) # 8-byte Folded Reload
-; VLEN512-NEXT:    addi sp, sp, 1024
+; VLEN512-NEXT:    addi a0, zero, 512
+; VLEN512-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
+; VLEN512-NEXT:    vadd.vv v8, v16, v8
 ; VLEN512-NEXT:    ret
+;
+; VLEN1024-LABEL: bitcast_1024B:
+; VLEN1024:       # %bb.0:
+; VLEN1024-NEXT:    addi a0, zero, 512
+; VLEN1024-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
+; VLEN1024-NEXT:    vadd.vv v8, v12, v8
+; VLEN1024-NEXT:    ret
   %c = bitcast <256 x i16> %a to <512 x i8>
   %v = add <512 x i8> %b, %c
   ret <512 x i8> %v

diff  --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
index 9673a88206871..8d0fe18bf1fba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
@@ -129,849 +129,74 @@ entry:
 define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16>* %1) local_unnamed_addr {
 ; RV64-1024-LABEL: interleave512:
 ; RV64-1024:       # %bb.0: # %entry
-; RV64-1024-NEXT:    addi sp, sp, -2032
-; RV64-1024-NEXT:    .cfi_def_cfa_offset 2032
-; RV64-1024-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s1, 2008(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s2, 2000(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s3, 1992(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s4, 1984(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s5, 1976(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s6, 1968(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s7, 1960(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s8, 1952(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s9, 1944(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s10, 1936(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sd s11, 1928(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    .cfi_offset ra, -8
-; RV64-1024-NEXT:    .cfi_offset s0, -16
-; RV64-1024-NEXT:    .cfi_offset s1, -24
-; RV64-1024-NEXT:    .cfi_offset s2, -32
-; RV64-1024-NEXT:    .cfi_offset s3, -40
-; RV64-1024-NEXT:    .cfi_offset s4, -48
-; RV64-1024-NEXT:    .cfi_offset s5, -56
-; RV64-1024-NEXT:    .cfi_offset s6, -64
-; RV64-1024-NEXT:    .cfi_offset s7, -72
-; RV64-1024-NEXT:    .cfi_offset s8, -80
-; RV64-1024-NEXT:    .cfi_offset s9, -88
-; RV64-1024-NEXT:    .cfi_offset s10, -96
-; RV64-1024-NEXT:    .cfi_offset s11, -104
-; RV64-1024-NEXT:    addi s0, sp, 2032
-; RV64-1024-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-1024-NEXT:    addi sp, sp, -16
+; RV64-1024-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-1024-NEXT:    csrr a3, vlenb
-; RV64-1024-NEXT:    addi a4, zero, 12
+; RV64-1024-NEXT:    addi a4, zero, 24
 ; RV64-1024-NEXT:    mul a3, a3, a4
 ; RV64-1024-NEXT:    sub sp, sp, a3
-; RV64-1024-NEXT:    andi sp, sp, -512
 ; RV64-1024-NEXT:    addi a3, zero, 256
 ; RV64-1024-NEXT:    vsetvli zero, a3, e16, m4, ta, mu
-; RV64-1024-NEXT:    addi a3, zero, 256
-; RV64-1024-NEXT:    vle16.v v16, (a1)
-; RV64-1024-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64-1024-NEXT:    addi a1, a1, %lo(.LCPI1_0)
-; RV64-1024-NEXT:    vle16.v v20, (a1)
-; RV64-1024-NEXT:    vle16.v v28, (a2)
+; RV64-1024-NEXT:    vle16.v v24, (a1)
+; RV64-1024-NEXT:    vle16.v v8, (a2)
 ; RV64-1024-NEXT:    csrr a1, vlenb
-; RV64-1024-NEXT:    slli a1, a1, 3
+; RV64-1024-NEXT:    slli a1, a1, 4
 ; RV64-1024-NEXT:    add a1, sp, a1
-; RV64-1024-NEXT:    addi a1, a1, 1944
-; RV64-1024-NEXT:    vs4r.v v28, (a1) # Unknown-size Folded Spill
-; RV64-1024-NEXT:    vrgather.vv v0, v16, v20
-; RV64-1024-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
+; RV64-1024-NEXT:    addi a1, a1, 16
+; RV64-1024-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-1024-NEXT:    addi a1, zero, 512
+; RV64-1024-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
 ; RV64-1024-NEXT:    vmv.v.i v8, 0
-; RV64-1024-NEXT:    addi a1, zero, 128
-; RV64-1024-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
-; RV64-1024-NEXT:    vslideup.vi v8, v0, 0
+; RV64-1024-NEXT:    vsetvli zero, a3, e16, m8, tu, mu
+; RV64-1024-NEXT:    vmv8r.v v0, v8
+; RV64-1024-NEXT:    vslideup.vi v0, v24, 0
+; RV64-1024-NEXT:    vsetvli zero, a3, e16, m4, ta, mu
+; RV64-1024-NEXT:    vmv.v.i v16, 0
+; RV64-1024-NEXT:    addi a2, sp, 16
+; RV64-1024-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-1024-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
+; RV64-1024-NEXT:    vslideup.vx v0, v16, a3
+; RV64-1024-NEXT:    lui a2, %hi(.LCPI1_0)
+; RV64-1024-NEXT:    addi a2, a2, %lo(.LCPI1_0)
+; RV64-1024-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
+; RV64-1024-NEXT:    vle16.v v16, (a2)
+; RV64-1024-NEXT:    vrgather.vv v24, v0, v16
+; RV64-1024-NEXT:    csrr a2, vlenb
+; RV64-1024-NEXT:    slli a2, a2, 3
+; RV64-1024-NEXT:    add a2, sp, a2
+; RV64-1024-NEXT:    addi a2, a2, 16
+; RV64-1024-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
+; RV64-1024-NEXT:    vsetvli zero, a3, e16, m8, tu, mu
+; RV64-1024-NEXT:    csrr a2, vlenb
+; RV64-1024-NEXT:    slli a2, a2, 4
+; RV64-1024-NEXT:    add a2, sp, a2
+; RV64-1024-NEXT:    addi a2, a2, 16
+; RV64-1024-NEXT:    vl8re8.v v16, (a2) # Unknown-size Folded Reload
+; RV64-1024-NEXT:    vslideup.vi v8, v16, 0
+; RV64-1024-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
+; RV64-1024-NEXT:    addi a2, sp, 16
+; RV64-1024-NEXT:    vl8re8.v v16, (a2) # Unknown-size Folded Reload
+; RV64-1024-NEXT:    vslideup.vx v8, v16, a3
 ; RV64-1024-NEXT:    lui a2, %hi(.LCPI1_1)
 ; RV64-1024-NEXT:    addi a2, a2, %lo(.LCPI1_1)
-; RV64-1024-NEXT:    vsetvli zero, a3, e16, m4, ta, mu
+; RV64-1024-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
 ; RV64-1024-NEXT:    vle16.v v24, (a2)
-; RV64-1024-NEXT:    vrgather.vv v0, v16, v24
-; RV64-1024-NEXT:    vrgather.vv v24, v0, v20
-; RV64-1024-NEXT:    vsetvli zero, zero, e32, m8, tu, mu
-; RV64-1024-NEXT:    vslideup.vx v8, v24, a1
-; RV64-1024-NEXT:    addi a1, zero, 127
-; RV64-1024-NEXT:    vsetivli zero, 1, e64, m8, ta, mu
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t5, v16
-; RV64-1024-NEXT:    addi a1, zero, 126
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t6, v16
-; RV64-1024-NEXT:    addi a1, zero, 125
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s2, v16
-; RV64-1024-NEXT:    addi a1, zero, 124
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s3, v16
-; RV64-1024-NEXT:    addi a1, zero, 123
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s4, v16
-; RV64-1024-NEXT:    addi a1, zero, 122
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s5, v16
-; RV64-1024-NEXT:    addi a1, zero, 121
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s6, v16
-; RV64-1024-NEXT:    addi a1, zero, 120
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s7, v16
-; RV64-1024-NEXT:    addi a1, zero, 119
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s8, v16
-; RV64-1024-NEXT:    addi a1, zero, 118
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s9, v16
-; RV64-1024-NEXT:    addi a1, zero, 117
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s10, v16
-; RV64-1024-NEXT:    addi a1, zero, 116
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s11, v16
-; RV64-1024-NEXT:    addi a1, zero, 115
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t4, v16
-; RV64-1024-NEXT:    addi a1, zero, 114
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s ra, v16
-; RV64-1024-NEXT:    addi a1, zero, 113
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a6, v16
-; RV64-1024-NEXT:    addi a1, zero, 112
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a7, v16
-; RV64-1024-NEXT:    addi a1, zero, 111
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t0, v16
-; RV64-1024-NEXT:    addi a1, zero, 110
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t1, v16
-; RV64-1024-NEXT:    addi a1, zero, 109
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a5, v16
-; RV64-1024-NEXT:    addi a1, zero, 108
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a3, v16
-; RV64-1024-NEXT:    addi a1, zero, 107
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    addi a2, zero, 106
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a2
-; RV64-1024-NEXT:    vmv.x.s a2, v16
-; RV64-1024-NEXT:    sd a2, 504(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    addi a4, zero, 105
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a4
-; RV64-1024-NEXT:    vmv.x.s a2, v16
-; RV64-1024-NEXT:    addi s1, zero, 104
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    vmv.x.s a4, v16
-; RV64-1024-NEXT:    addi s1, zero, 103
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    addi s1, zero, 102
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    addi s1, zero, 101
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s1, v24
-; RV64-1024-NEXT:    sd s1, 496(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sh t5, 1016(sp)
-; RV64-1024-NEXT:    srli s1, t5, 32
-; RV64-1024-NEXT:    sh s1, 1020(sp)
-; RV64-1024-NEXT:    addi s1, zero, 100
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s t5, v16
-; RV64-1024-NEXT:    sh t6, 1008(sp)
-; RV64-1024-NEXT:    srli s1, t6, 32
-; RV64-1024-NEXT:    sh s1, 1012(sp)
-; RV64-1024-NEXT:    addi s1, zero, 99
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    vmv.x.s t6, v0
-; RV64-1024-NEXT:    sh s2, 1000(sp)
-; RV64-1024-NEXT:    srli s1, s2, 32
-; RV64-1024-NEXT:    sh s1, 1004(sp)
-; RV64-1024-NEXT:    addi s1, zero, 98
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s2, v24
-; RV64-1024-NEXT:    sh s3, 992(sp)
-; RV64-1024-NEXT:    srli s1, s3, 32
-; RV64-1024-NEXT:    sh s1, 996(sp)
-; RV64-1024-NEXT:    addi s1, zero, 97
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s3, v16
-; RV64-1024-NEXT:    sh s4, 984(sp)
-; RV64-1024-NEXT:    srli s1, s4, 32
-; RV64-1024-NEXT:    sh s1, 988(sp)
-; RV64-1024-NEXT:    addi s1, zero, 96
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s4, v0
-; RV64-1024-NEXT:    sh s5, 976(sp)
-; RV64-1024-NEXT:    srli s1, s5, 32
-; RV64-1024-NEXT:    sh s1, 980(sp)
-; RV64-1024-NEXT:    addi s1, zero, 95
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s5, v24
-; RV64-1024-NEXT:    sh s6, 968(sp)
-; RV64-1024-NEXT:    srli s1, s6, 32
-; RV64-1024-NEXT:    sh s1, 972(sp)
-; RV64-1024-NEXT:    addi s1, zero, 94
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s6, v16
-; RV64-1024-NEXT:    sh s7, 960(sp)
-; RV64-1024-NEXT:    srli s1, s7, 32
-; RV64-1024-NEXT:    sh s1, 964(sp)
-; RV64-1024-NEXT:    addi s1, zero, 93
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s7, v0
-; RV64-1024-NEXT:    sh s8, 952(sp)
-; RV64-1024-NEXT:    srli s1, s8, 32
-; RV64-1024-NEXT:    sh s1, 956(sp)
-; RV64-1024-NEXT:    addi s1, zero, 92
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s8, v24
-; RV64-1024-NEXT:    sh s9, 944(sp)
-; RV64-1024-NEXT:    srli s1, s9, 32
-; RV64-1024-NEXT:    sh s1, 948(sp)
-; RV64-1024-NEXT:    addi s1, zero, 91
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s9, v16
-; RV64-1024-NEXT:    sh s10, 936(sp)
-; RV64-1024-NEXT:    srli s1, s10, 32
-; RV64-1024-NEXT:    sh s1, 940(sp)
-; RV64-1024-NEXT:    addi s1, zero, 90
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s10, v0
-; RV64-1024-NEXT:    sh s11, 928(sp)
-; RV64-1024-NEXT:    srli s1, s11, 32
-; RV64-1024-NEXT:    sh s1, 932(sp)
-; RV64-1024-NEXT:    addi s1, zero, 89
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s11, v24
-; RV64-1024-NEXT:    sh t4, 920(sp)
-; RV64-1024-NEXT:    srli s1, t4, 32
-; RV64-1024-NEXT:    sh s1, 924(sp)
-; RV64-1024-NEXT:    addi s1, zero, 88
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s1, v16
-; RV64-1024-NEXT:    sd s1, 488(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sh ra, 912(sp)
-; RV64-1024-NEXT:    srli s1, ra, 32
-; RV64-1024-NEXT:    sh s1, 916(sp)
-; RV64-1024-NEXT:    addi s1, zero, 87
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s1, v0
-; RV64-1024-NEXT:    sd s1, 480(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sh a6, 904(sp)
-; RV64-1024-NEXT:    srli s1, a6, 32
-; RV64-1024-NEXT:    sh s1, 908(sp)
-; RV64-1024-NEXT:    addi s1, zero, 86
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s1, v24
-; RV64-1024-NEXT:    sd s1, 472(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sh a7, 896(sp)
-; RV64-1024-NEXT:    srli s1, a7, 32
-; RV64-1024-NEXT:    sh s1, 900(sp)
-; RV64-1024-NEXT:    addi s1, zero, 85
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s1, v16
-; RV64-1024-NEXT:    sd s1, 464(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sh t0, 888(sp)
-; RV64-1024-NEXT:    srli s1, t0, 32
-; RV64-1024-NEXT:    sh s1, 892(sp)
-; RV64-1024-NEXT:    addi s1, zero, 84
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s1, v0
-; RV64-1024-NEXT:    sd s1, 456(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    sh t1, 880(sp)
-; RV64-1024-NEXT:    srli s1, t1, 32
-; RV64-1024-NEXT:    sh s1, 884(sp)
-; RV64-1024-NEXT:    addi s1, zero, 83
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s t1, v24
-; RV64-1024-NEXT:    sh a5, 872(sp)
-; RV64-1024-NEXT:    srli a5, a5, 32
-; RV64-1024-NEXT:    sh a5, 876(sp)
-; RV64-1024-NEXT:    addi a5, zero, 82
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, a5
-; RV64-1024-NEXT:    vmv.x.s t2, v16
-; RV64-1024-NEXT:    sh a3, 864(sp)
-; RV64-1024-NEXT:    srli a3, a3, 32
-; RV64-1024-NEXT:    sh a3, 868(sp)
-; RV64-1024-NEXT:    addi a3, zero, 81
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a3
-; RV64-1024-NEXT:    vmv.x.s t3, v0
-; RV64-1024-NEXT:    sh a1, 856(sp)
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 860(sp)
-; RV64-1024-NEXT:    addi a1, zero, 80
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t4, v24
-; RV64-1024-NEXT:    ld a1, 504(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh a1, 848(sp)
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 852(sp)
-; RV64-1024-NEXT:    addi a1, zero, 79
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, a1
-; RV64-1024-NEXT:    vmv.x.s ra, v16
-; RV64-1024-NEXT:    sh a2, 840(sp)
-; RV64-1024-NEXT:    srli a2, a2, 32
-; RV64-1024-NEXT:    sh a2, 844(sp)
-; RV64-1024-NEXT:    addi a2, zero, 78
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a2
-; RV64-1024-NEXT:    vmv.x.s a6, v0
-; RV64-1024-NEXT:    sh a4, 832(sp)
-; RV64-1024-NEXT:    srli a4, a4, 32
-; RV64-1024-NEXT:    sh a4, 836(sp)
-; RV64-1024-NEXT:    addi a4, zero, 77
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a4
-; RV64-1024-NEXT:    vmv.x.s a7, v24
-; RV64-1024-NEXT:    ld a1, 496(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh a1, 824(sp)
-; RV64-1024-NEXT:    srli s1, a1, 32
-; RV64-1024-NEXT:    sh s1, 828(sp)
-; RV64-1024-NEXT:    addi s1, zero, 76
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s t0, v16
-; RV64-1024-NEXT:    sh t5, 816(sp)
-; RV64-1024-NEXT:    srli a5, t5, 32
-; RV64-1024-NEXT:    sh a5, 820(sp)
-; RV64-1024-NEXT:    addi a5, zero, 75
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a5
-; RV64-1024-NEXT:    vmv.x.s t5, v0
-; RV64-1024-NEXT:    sh t6, 808(sp)
-; RV64-1024-NEXT:    srli a3, t6, 32
-; RV64-1024-NEXT:    sh a3, 812(sp)
-; RV64-1024-NEXT:    addi a3, zero, 74
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a3
-; RV64-1024-NEXT:    vmv.x.s t6, v24
-; RV64-1024-NEXT:    sh s2, 800(sp)
-; RV64-1024-NEXT:    srli a1, s2, 32
-; RV64-1024-NEXT:    sh a1, 804(sp)
-; RV64-1024-NEXT:    addi a1, zero, 73
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s2, v16
-; RV64-1024-NEXT:    sh s3, 792(sp)
-; RV64-1024-NEXT:    srli a2, s3, 32
-; RV64-1024-NEXT:    sh a2, 796(sp)
-; RV64-1024-NEXT:    addi a2, zero, 72
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a2
-; RV64-1024-NEXT:    vmv.x.s s3, v0
-; RV64-1024-NEXT:    sh s4, 784(sp)
-; RV64-1024-NEXT:    srli a4, s4, 32
-; RV64-1024-NEXT:    sh a4, 788(sp)
-; RV64-1024-NEXT:    addi a4, zero, 71
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a4
-; RV64-1024-NEXT:    vmv.x.s s4, v24
-; RV64-1024-NEXT:    sh s5, 776(sp)
-; RV64-1024-NEXT:    srli s1, s5, 32
-; RV64-1024-NEXT:    sh s1, 780(sp)
-; RV64-1024-NEXT:    addi s1, zero, 70
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s5, v16
-; RV64-1024-NEXT:    sh s6, 768(sp)
-; RV64-1024-NEXT:    srli a5, s6, 32
-; RV64-1024-NEXT:    sh a5, 772(sp)
-; RV64-1024-NEXT:    addi a5, zero, 69
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a5
-; RV64-1024-NEXT:    vmv.x.s s6, v0
-; RV64-1024-NEXT:    sh s7, 760(sp)
-; RV64-1024-NEXT:    srli a3, s7, 32
-; RV64-1024-NEXT:    sh a3, 764(sp)
-; RV64-1024-NEXT:    addi a3, zero, 68
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a3
-; RV64-1024-NEXT:    vmv.x.s s7, v24
-; RV64-1024-NEXT:    sh s8, 752(sp)
-; RV64-1024-NEXT:    srli a1, s8, 32
-; RV64-1024-NEXT:    sh a1, 756(sp)
-; RV64-1024-NEXT:    addi a1, zero, 67
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s8, v16
-; RV64-1024-NEXT:    sh s9, 744(sp)
-; RV64-1024-NEXT:    srli a2, s9, 32
-; RV64-1024-NEXT:    sh a2, 748(sp)
-; RV64-1024-NEXT:    addi a2, zero, 66
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a2
-; RV64-1024-NEXT:    vmv.x.s s9, v0
-; RV64-1024-NEXT:    sh s10, 736(sp)
-; RV64-1024-NEXT:    srli a4, s10, 32
-; RV64-1024-NEXT:    sh a4, 740(sp)
-; RV64-1024-NEXT:    addi a4, zero, 65
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a4
-; RV64-1024-NEXT:    addi a1, sp, 1944
-; RV64-1024-NEXT:    vs8r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-1024-NEXT:    vmv.x.s s10, v24
-; RV64-1024-NEXT:    sh s11, 728(sp)
-; RV64-1024-NEXT:    srli s1, s11, 32
-; RV64-1024-NEXT:    sh s1, 732(sp)
-; RV64-1024-NEXT:    addi s1, zero, 64
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s11, v16
-; RV64-1024-NEXT:    ld a1, 488(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh a1, 720(sp)
-; RV64-1024-NEXT:    ld a3, 480(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh a3, 712(sp)
-; RV64-1024-NEXT:    ld a2, 472(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh a2, 704(sp)
-; RV64-1024-NEXT:    ld a4, 464(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh a4, 696(sp)
-; RV64-1024-NEXT:    ld s1, 456(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh s1, 688(sp)
-; RV64-1024-NEXT:    sh t1, 680(sp)
-; RV64-1024-NEXT:    sh t2, 672(sp)
-; RV64-1024-NEXT:    sh t3, 664(sp)
-; RV64-1024-NEXT:    sh t4, 656(sp)
-; RV64-1024-NEXT:    sh ra, 648(sp)
-; RV64-1024-NEXT:    sh a6, 640(sp)
-; RV64-1024-NEXT:    sh a7, 632(sp)
-; RV64-1024-NEXT:    sh t0, 624(sp)
-; RV64-1024-NEXT:    sh t5, 616(sp)
-; RV64-1024-NEXT:    sh t6, 608(sp)
-; RV64-1024-NEXT:    sh s2, 600(sp)
-; RV64-1024-NEXT:    sh s3, 592(sp)
-; RV64-1024-NEXT:    sh s4, 584(sp)
-; RV64-1024-NEXT:    sh s5, 576(sp)
-; RV64-1024-NEXT:    sh s6, 568(sp)
-; RV64-1024-NEXT:    sh s7, 560(sp)
-; RV64-1024-NEXT:    sh s8, 552(sp)
-; RV64-1024-NEXT:    sh s9, 544(sp)
-; RV64-1024-NEXT:    sh s10, 536(sp)
-; RV64-1024-NEXT:    sh s11, 528(sp)
-; RV64-1024-NEXT:    srli a5, a1, 32
-; RV64-1024-NEXT:    sh a5, 724(sp)
-; RV64-1024-NEXT:    addi a1, sp, 1944
-; RV64-1024-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
-; RV64-1024-NEXT:    vmv.x.s a5, v16
-; RV64-1024-NEXT:    sh a5, 520(sp)
-; RV64-1024-NEXT:    srli a3, a3, 32
-; RV64-1024-NEXT:    sh a3, 716(sp)
-; RV64-1024-NEXT:    vmv.x.s a3, v0
-; RV64-1024-NEXT:    sh a3, 512(sp)
-; RV64-1024-NEXT:    srli a1, a2, 32
-; RV64-1024-NEXT:    sh a1, 708(sp)
-; RV64-1024-NEXT:    srli a1, a4, 32
-; RV64-1024-NEXT:    sh a1, 700(sp)
-; RV64-1024-NEXT:    srli a1, s1, 32
-; RV64-1024-NEXT:    sh a1, 692(sp)
-; RV64-1024-NEXT:    srli a1, t1, 32
-; RV64-1024-NEXT:    sh a1, 684(sp)
-; RV64-1024-NEXT:    srli a1, t2, 32
-; RV64-1024-NEXT:    sh a1, 676(sp)
-; RV64-1024-NEXT:    srli a1, t3, 32
-; RV64-1024-NEXT:    sh a1, 668(sp)
-; RV64-1024-NEXT:    srli a1, t4, 32
-; RV64-1024-NEXT:    sh a1, 660(sp)
-; RV64-1024-NEXT:    srli a1, ra, 32
-; RV64-1024-NEXT:    sh a1, 652(sp)
-; RV64-1024-NEXT:    srli a1, a6, 32
-; RV64-1024-NEXT:    sh a1, 644(sp)
-; RV64-1024-NEXT:    srli a1, a7, 32
-; RV64-1024-NEXT:    sh a1, 636(sp)
-; RV64-1024-NEXT:    srli a1, t0, 32
-; RV64-1024-NEXT:    sh a1, 628(sp)
-; RV64-1024-NEXT:    srli a1, t5, 32
-; RV64-1024-NEXT:    sh a1, 620(sp)
-; RV64-1024-NEXT:    srli a1, t6, 32
-; RV64-1024-NEXT:    sh a1, 612(sp)
-; RV64-1024-NEXT:    srli a1, s2, 32
-; RV64-1024-NEXT:    sh a1, 604(sp)
-; RV64-1024-NEXT:    srli a1, s3, 32
-; RV64-1024-NEXT:    sh a1, 596(sp)
-; RV64-1024-NEXT:    srli a1, s4, 32
-; RV64-1024-NEXT:    sh a1, 588(sp)
-; RV64-1024-NEXT:    srli a1, s5, 32
-; RV64-1024-NEXT:    sh a1, 580(sp)
-; RV64-1024-NEXT:    srli a1, s6, 32
-; RV64-1024-NEXT:    sh a1, 572(sp)
-; RV64-1024-NEXT:    srli a1, s7, 32
-; RV64-1024-NEXT:    sh a1, 564(sp)
-; RV64-1024-NEXT:    srli a1, s8, 32
-; RV64-1024-NEXT:    sh a1, 556(sp)
-; RV64-1024-NEXT:    srli a1, s9, 32
-; RV64-1024-NEXT:    sh a1, 548(sp)
-; RV64-1024-NEXT:    srli a1, s10, 32
-; RV64-1024-NEXT:    sh a1, 540(sp)
-; RV64-1024-NEXT:    srli a1, s11, 32
-; RV64-1024-NEXT:    sh a1, 532(sp)
-; RV64-1024-NEXT:    srli a1, a5, 32
-; RV64-1024-NEXT:    sh a1, 524(sp)
-; RV64-1024-NEXT:    srli a1, a3, 32
-; RV64-1024-NEXT:    sh a1, 516(sp)
-; RV64-1024-NEXT:    addi a1, zero, 63
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s5, v16
-; RV64-1024-NEXT:    addi a1, zero, 62
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s4, v16
-; RV64-1024-NEXT:    addi a1, zero, 61
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s2, v16
-; RV64-1024-NEXT:    addi a1, zero, 60
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t5, v16
-; RV64-1024-NEXT:    addi a1, zero, 59
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sd a1, 488(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    addi a1, zero, 58
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sd a1, 504(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    addi a1, zero, 57
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sd a1, 496(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    addi a1, zero, 56
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sd a1, 480(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    addi a1, zero, 55
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sd a1, 472(sp) # 8-byte Folded Spill
-; RV64-1024-NEXT:    addi a1, zero, 54
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t4, v16
-; RV64-1024-NEXT:    addi a1, zero, 53
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t6, v16
-; RV64-1024-NEXT:    addi a1, zero, 52
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s3, v16
-; RV64-1024-NEXT:    addi a1, zero, 51
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s6, v16
-; RV64-1024-NEXT:    addi a1, zero, 50
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s7, v16
-; RV64-1024-NEXT:    addi a1, zero, 49
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s8, v16
-; RV64-1024-NEXT:    addi a1, zero, 48
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s9, v16
-; RV64-1024-NEXT:    addi a1, zero, 47
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s10, v16
-; RV64-1024-NEXT:    addi a1, zero, 46
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s s11, v16
-; RV64-1024-NEXT:    addi a1, zero, 45
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s ra, v16
-; RV64-1024-NEXT:    addi a1, zero, 44
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a6, v16
-; RV64-1024-NEXT:    addi a1, zero, 43
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a7, v16
-; RV64-1024-NEXT:    addi a1, zero, 42
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t0, v16
-; RV64-1024-NEXT:    addi a1, zero, 41
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t1, v16
-; RV64-1024-NEXT:    addi a1, zero, 40
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a1
-; RV64-1024-NEXT:    vmv.x.s t2, v16
-; RV64-1024-NEXT:    addi s1, zero, 39
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, s1
-; RV64-1024-NEXT:    addi s1, zero, 38
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, s1
-; RV64-1024-NEXT:    vmv.x.s s1, v8
-; RV64-1024-NEXT:    sh s1, 1024(sp)
-; RV64-1024-NEXT:    srli s1, s1, 32
-; RV64-1024-NEXT:    sh s1, 1028(sp)
-; RV64-1024-NEXT:    addi s1, zero, 37
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, s1
-; RV64-1024-NEXT:    vmv.x.s t3, v16
-; RV64-1024-NEXT:    sh s5, 1528(sp)
-; RV64-1024-NEXT:    srli a2, s5, 32
-; RV64-1024-NEXT:    sh a2, 1532(sp)
-; RV64-1024-NEXT:    addi a2, zero, 36
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a2
-; RV64-1024-NEXT:    vmv.x.s a2, v0
-; RV64-1024-NEXT:    sh s4, 1520(sp)
-; RV64-1024-NEXT:    srli a3, s4, 32
-; RV64-1024-NEXT:    sh a3, 1524(sp)
-; RV64-1024-NEXT:    addi a3, zero, 35
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a3
-; RV64-1024-NEXT:    vmv.x.s a3, v24
-; RV64-1024-NEXT:    sh s2, 1512(sp)
-; RV64-1024-NEXT:    srli a4, s2, 32
-; RV64-1024-NEXT:    sh a4, 1516(sp)
-; RV64-1024-NEXT:    addi a4, zero, 34
-; RV64-1024-NEXT:    vslidedown.vx v24, v8, a4
-; RV64-1024-NEXT:    vmv.x.s a4, v16
-; RV64-1024-NEXT:    sh t5, 1504(sp)
-; RV64-1024-NEXT:    srli a5, t5, 32
-; RV64-1024-NEXT:    sh a5, 1508(sp)
-; RV64-1024-NEXT:    addi a5, zero, 33
-; RV64-1024-NEXT:    vslidedown.vx v16, v8, a5
-; RV64-1024-NEXT:    vmv.x.s a5, v0
-; RV64-1024-NEXT:    ld a1, 488(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh a1, 1496(sp)
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1500(sp)
-; RV64-1024-NEXT:    addi a1, zero, 32
-; RV64-1024-NEXT:    vslidedown.vx v0, v8, a1
-; RV64-1024-NEXT:    vmv.x.s a1, v24
-; RV64-1024-NEXT:    ld s1, 504(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh s1, 1488(sp)
-; RV64-1024-NEXT:    srli s1, s1, 32
-; RV64-1024-NEXT:    sh s1, 1492(sp)
-; RV64-1024-NEXT:    ld s1, 496(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh s1, 1480(sp)
-; RV64-1024-NEXT:    srli s1, s1, 32
-; RV64-1024-NEXT:    sh s1, 1484(sp)
-; RV64-1024-NEXT:    ld s1, 480(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh s1, 1472(sp)
-; RV64-1024-NEXT:    srli s1, s1, 32
-; RV64-1024-NEXT:    sh s1, 1476(sp)
-; RV64-1024-NEXT:    ld s1, 472(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    sh s1, 1464(sp)
-; RV64-1024-NEXT:    srli s1, s1, 32
-; RV64-1024-NEXT:    sh s1, 1468(sp)
-; RV64-1024-NEXT:    sh t4, 1456(sp)
-; RV64-1024-NEXT:    srli s1, t4, 32
-; RV64-1024-NEXT:    sh s1, 1460(sp)
-; RV64-1024-NEXT:    sh t6, 1448(sp)
-; RV64-1024-NEXT:    srli s1, t6, 32
-; RV64-1024-NEXT:    sh s1, 1452(sp)
-; RV64-1024-NEXT:    sh s3, 1440(sp)
-; RV64-1024-NEXT:    srli s1, s3, 32
-; RV64-1024-NEXT:    sh s1, 1444(sp)
-; RV64-1024-NEXT:    sh s6, 1432(sp)
-; RV64-1024-NEXT:    srli s1, s6, 32
-; RV64-1024-NEXT:    sh s1, 1436(sp)
-; RV64-1024-NEXT:    sh s7, 1424(sp)
-; RV64-1024-NEXT:    srli s1, s7, 32
-; RV64-1024-NEXT:    sh s1, 1428(sp)
-; RV64-1024-NEXT:    sh s8, 1416(sp)
-; RV64-1024-NEXT:    srli s1, s8, 32
-; RV64-1024-NEXT:    sh s1, 1420(sp)
-; RV64-1024-NEXT:    sh s9, 1408(sp)
-; RV64-1024-NEXT:    srli s1, s9, 32
-; RV64-1024-NEXT:    sh s1, 1412(sp)
-; RV64-1024-NEXT:    sh s10, 1400(sp)
-; RV64-1024-NEXT:    srli s1, s10, 32
-; RV64-1024-NEXT:    sh s1, 1404(sp)
-; RV64-1024-NEXT:    sh s11, 1392(sp)
-; RV64-1024-NEXT:    srli s1, s11, 32
-; RV64-1024-NEXT:    sh s1, 1396(sp)
-; RV64-1024-NEXT:    sh ra, 1384(sp)
-; RV64-1024-NEXT:    srli s1, ra, 32
-; RV64-1024-NEXT:    sh s1, 1388(sp)
-; RV64-1024-NEXT:    sh a6, 1376(sp)
-; RV64-1024-NEXT:    srli s1, a6, 32
-; RV64-1024-NEXT:    sh s1, 1380(sp)
-; RV64-1024-NEXT:    sh a7, 1368(sp)
-; RV64-1024-NEXT:    srli s1, a7, 32
-; RV64-1024-NEXT:    sh s1, 1372(sp)
-; RV64-1024-NEXT:    sh t0, 1360(sp)
-; RV64-1024-NEXT:    srli s1, t0, 32
-; RV64-1024-NEXT:    sh s1, 1364(sp)
-; RV64-1024-NEXT:    sh t1, 1352(sp)
-; RV64-1024-NEXT:    srli s1, t1, 32
-; RV64-1024-NEXT:    sh s1, 1356(sp)
-; RV64-1024-NEXT:    sh t2, 1344(sp)
-; RV64-1024-NEXT:    srli s1, t2, 32
-; RV64-1024-NEXT:    sh s1, 1348(sp)
-; RV64-1024-NEXT:    sh t3, 1336(sp)
-; RV64-1024-NEXT:    srli s1, t3, 32
-; RV64-1024-NEXT:    sh s1, 1340(sp)
-; RV64-1024-NEXT:    sh a2, 1328(sp)
-; RV64-1024-NEXT:    srli a2, a2, 32
-; RV64-1024-NEXT:    sh a2, 1332(sp)
-; RV64-1024-NEXT:    sh a3, 1320(sp)
-; RV64-1024-NEXT:    srli a2, a3, 32
-; RV64-1024-NEXT:    sh a2, 1324(sp)
-; RV64-1024-NEXT:    sh a4, 1312(sp)
-; RV64-1024-NEXT:    srli a2, a4, 32
-; RV64-1024-NEXT:    sh a2, 1316(sp)
-; RV64-1024-NEXT:    sh a5, 1304(sp)
-; RV64-1024-NEXT:    srli a2, a5, 32
-; RV64-1024-NEXT:    sh a2, 1308(sp)
-; RV64-1024-NEXT:    sh a1, 1296(sp)
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1300(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1288(sp)
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1292(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v0
-; RV64-1024-NEXT:    sh a1, 1280(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 31
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1284(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1272(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 30
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1276(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1264(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 29
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1268(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1256(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 28
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1260(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1248(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 27
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1252(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1240(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 26
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1244(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1232(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 25
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1236(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1224(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 24
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1228(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1216(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 23
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1220(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1208(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 22
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1212(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1200(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 21
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1204(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1192(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 20
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1196(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1184(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 19
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1188(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1176(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 18
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1180(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1168(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 17
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1172(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1160(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 16
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1164(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1152(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 15
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1156(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1144(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 14
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1148(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1136(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 13
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1140(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1128(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 12
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1132(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1120(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 11
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1124(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1112(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 10
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1116(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1104(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 9
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1108(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1096(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 8
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1100(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1088(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 7
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1092(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1080(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 6
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1084(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1072(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 5
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1076(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1064(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 4
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1068(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1056(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 3
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1060(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1048(sp)
-; RV64-1024-NEXT:    vslidedown.vi v16, v8, 2
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1052(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v16
-; RV64-1024-NEXT:    sh a1, 1040(sp)
-; RV64-1024-NEXT:    vslidedown.vi v8, v8, 1
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1044(sp)
-; RV64-1024-NEXT:    vmv.x.s a1, v8
-; RV64-1024-NEXT:    sh a1, 1032(sp)
-; RV64-1024-NEXT:    srli a1, a1, 32
-; RV64-1024-NEXT:    sh a1, 1036(sp)
-; RV64-1024-NEXT:    addi a2, zero, 256
-; RV64-1024-NEXT:    vsetvli zero, a2, e16, m4, ta, mu
-; RV64-1024-NEXT:    addi a1, sp, 512
-; RV64-1024-NEXT:    vle16.v v8, (a1)
-; RV64-1024-NEXT:    addi a1, sp, 1024
-; RV64-1024-NEXT:    vle16.v v28, (a1)
-; RV64-1024-NEXT:    lui a1, 1026731
-; RV64-1024-NEXT:    addiw a1, a1, -1365
-; RV64-1024-NEXT:    slli a1, a1, 12
-; RV64-1024-NEXT:    addi a1, a1, -1365
-; RV64-1024-NEXT:    slli a1, a1, 12
-; RV64-1024-NEXT:    addi a1, a1, -1365
-; RV64-1024-NEXT:    slli a1, a1, 12
-; RV64-1024-NEXT:    addi a1, a1, -1366
-; RV64-1024-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
-; RV64-1024-NEXT:    vmv.s.x v25, a1
+; RV64-1024-NEXT:    csrr a2, vlenb
+; RV64-1024-NEXT:    slli a2, a2, 3
+; RV64-1024-NEXT:    add a2, sp, a2
+; RV64-1024-NEXT:    addi a2, a2, 16
+; RV64-1024-NEXT:    vl8re8.v v0, (a2) # Unknown-size Folded Reload
+; RV64-1024-NEXT:    vrgather.vv v16, v0, v24
+; RV64-1024-NEXT:    lui a2, 1026731
+; RV64-1024-NEXT:    addiw a2, a2, -1365
+; RV64-1024-NEXT:    slli a2, a2, 12
+; RV64-1024-NEXT:    addi a2, a2, -1365
+; RV64-1024-NEXT:    slli a2, a2, 12
+; RV64-1024-NEXT:    addi a2, a2, -1365
+; RV64-1024-NEXT:    slli a2, a2, 12
+; RV64-1024-NEXT:    addi a2, a2, -1366
+; RV64-1024-NEXT:    vsetivli zero, 8, e64, m1, ta, mu
+; RV64-1024-NEXT:    vmv.s.x v25, a2
 ; RV64-1024-NEXT:    vsetivli zero, 2, e64, m1, tu, mu
 ; RV64-1024-NEXT:    vmv1r.v v0, v25
 ; RV64-1024-NEXT:    vslideup.vi v0, v25, 1
@@ -979,893 +204,69 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16
 ; RV64-1024-NEXT:    vslideup.vi v0, v25, 2
 ; RV64-1024-NEXT:    vsetivli zero, 4, e64, m1, tu, mu
 ; RV64-1024-NEXT:    vslideup.vi v0, v25, 3
-; RV64-1024-NEXT:    vsetvli zero, a2, e16, m4, ta, mu
-; RV64-1024-NEXT:    lui a1, %hi(.LCPI1_2)
-; RV64-1024-NEXT:    addi a1, a1, %lo(.LCPI1_2)
-; RV64-1024-NEXT:    vle16.v v12, (a1)
-; RV64-1024-NEXT:    lui a1, %hi(.LCPI1_3)
-; RV64-1024-NEXT:    addi a1, a1, %lo(.LCPI1_3)
-; RV64-1024-NEXT:    vle16.v v16, (a1)
-; RV64-1024-NEXT:    vrgather.vv v20, v28, v12
-; RV64-1024-NEXT:    vsetvli zero, zero, e16, m4, tu, mu
-; RV64-1024-NEXT:    csrr a1, vlenb
-; RV64-1024-NEXT:    slli a1, a1, 3
-; RV64-1024-NEXT:    add a1, sp, a1
-; RV64-1024-NEXT:    addi a1, a1, 1944
-; RV64-1024-NEXT:    vl4re8.v v24, (a1) # Unknown-size Folded Reload
-; RV64-1024-NEXT:    vrgather.vv v20, v24, v16, v0.t
-; RV64-1024-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64-1024-NEXT:    lui a1, %hi(.LCPI1_4)
-; RV64-1024-NEXT:    addi a1, a1, %lo(.LCPI1_4)
-; RV64-1024-NEXT:    vle16.v v28, (a1)
-; RV64-1024-NEXT:    vrgather.vv v16, v8, v12
-; RV64-1024-NEXT:    vsetvli zero, zero, e16, m4, tu, mu
-; RV64-1024-NEXT:    vrgather.vv v16, v24, v28, v0.t
-; RV64-1024-NEXT:    addi a1, a0, 512
-; RV64-1024-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64-1024-NEXT:    vse16.v v16, (a1)
-; RV64-1024-NEXT:    vse16.v v20, (a0)
-; RV64-1024-NEXT:    addi sp, s0, -2048
+; RV64-1024-NEXT:    vsetivli zero, 5, e64, m1, tu, mu
+; RV64-1024-NEXT:    vslideup.vi v0, v25, 4
+; RV64-1024-NEXT:    vsetivli zero, 6, e64, m1, tu, mu
+; RV64-1024-NEXT:    vslideup.vi v0, v25, 5
+; RV64-1024-NEXT:    vsetivli zero, 7, e64, m1, tu, mu
+; RV64-1024-NEXT:    vslideup.vi v0, v25, 6
+; RV64-1024-NEXT:    vsetivli zero, 8, e64, m1, tu, mu
+; RV64-1024-NEXT:    vslideup.vi v0, v25, 7
+; RV64-1024-NEXT:    lui a2, %hi(.LCPI1_2)
+; RV64-1024-NEXT:    addi a2, a2, %lo(.LCPI1_2)
+; RV64-1024-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
+; RV64-1024-NEXT:    vle16.v v24, (a2)
+; RV64-1024-NEXT:    vsetvli zero, zero, e16, m8, tu, mu
+; RV64-1024-NEXT:    vrgather.vv v16, v8, v24, v0.t
+; RV64-1024-NEXT:    vsetvli zero, zero, e16, m8, ta, mu
+; RV64-1024-NEXT:    vse16.v v16, (a0)
+; RV64-1024-NEXT:    csrr a0, vlenb
+; RV64-1024-NEXT:    addi a1, zero, 24
+; RV64-1024-NEXT:    mul a0, a0, a1
+; RV64-1024-NEXT:    add sp, sp, a0
 ; RV64-1024-NEXT:    addi sp, sp, 16
-; RV64-1024-NEXT:    ld s11, 1928(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s10, 1936(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s9, 1944(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s8, 1952(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s7, 1960(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s6, 1968(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s5, 1976(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s4, 1984(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s3, 1992(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s2, 2000(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
-; RV64-1024-NEXT:    addi sp, sp, 2032
 ; RV64-1024-NEXT:    ret
 ;
 ; RV64-2048-LABEL: interleave512:
 ; RV64-2048:       # %bb.0: # %entry
-; RV64-2048-NEXT:    addi sp, sp, -2032
-; RV64-2048-NEXT:    .cfi_def_cfa_offset 2032
-; RV64-2048-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s1, 2008(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s2, 2000(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s3, 1992(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s4, 1984(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s5, 1976(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s6, 1968(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s7, 1960(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s8, 1952(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s9, 1944(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s10, 1936(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sd s11, 1928(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    .cfi_offset ra, -8
-; RV64-2048-NEXT:    .cfi_offset s0, -16
-; RV64-2048-NEXT:    .cfi_offset s1, -24
-; RV64-2048-NEXT:    .cfi_offset s2, -32
-; RV64-2048-NEXT:    .cfi_offset s3, -40
-; RV64-2048-NEXT:    .cfi_offset s4, -48
-; RV64-2048-NEXT:    .cfi_offset s5, -56
-; RV64-2048-NEXT:    .cfi_offset s6, -64
-; RV64-2048-NEXT:    .cfi_offset s7, -72
-; RV64-2048-NEXT:    .cfi_offset s8, -80
-; RV64-2048-NEXT:    .cfi_offset s9, -88
-; RV64-2048-NEXT:    .cfi_offset s10, -96
-; RV64-2048-NEXT:    .cfi_offset s11, -104
-; RV64-2048-NEXT:    addi s0, sp, 2032
-; RV64-2048-NEXT:    .cfi_def_cfa s0, 0
-; RV64-2048-NEXT:    addi sp, sp, -16
-; RV64-2048-NEXT:    csrr a3, vlenb
-; RV64-2048-NEXT:    addi a4, zero, 6
-; RV64-2048-NEXT:    mul a3, a3, a4
-; RV64-2048-NEXT:    sub sp, sp, a3
-; RV64-2048-NEXT:    andi sp, sp, -512
 ; RV64-2048-NEXT:    addi a3, zero, 256
 ; RV64-2048-NEXT:    vsetvli zero, a3, e16, m2, ta, mu
-; RV64-2048-NEXT:    addi a3, zero, 256
-; RV64-2048-NEXT:    vle16.v v26, (a1)
-; RV64-2048-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64-2048-NEXT:    addi a1, a1, %lo(.LCPI1_0)
-; RV64-2048-NEXT:    vle16.v v8, (a1)
-; RV64-2048-NEXT:    vle16.v v28, (a2)
-; RV64-2048-NEXT:    csrr a1, vlenb
-; RV64-2048-NEXT:    slli a1, a1, 2
-; RV64-2048-NEXT:    add a1, sp, a1
-; RV64-2048-NEXT:    addi a1, a1, 1944
-; RV64-2048-NEXT:    vs2r.v v28, (a1) # Unknown-size Folded Spill
-; RV64-2048-NEXT:    vrgather.vv v12, v26, v8
-; RV64-2048-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV64-2048-NEXT:    vle16.v v12, (a1)
+; RV64-2048-NEXT:    vle16.v v8, (a2)
+; RV64-2048-NEXT:    addi a1, zero, 512
+; RV64-2048-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
 ; RV64-2048-NEXT:    vmv.v.i v28, 0
-; RV64-2048-NEXT:    addi a1, zero, 128
-; RV64-2048-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; RV64-2048-NEXT:    vslideup.vi v28, v12, 0
+; RV64-2048-NEXT:    vsetvli zero, a3, e16, m4, tu, mu
+; RV64-2048-NEXT:    vmv4r.v v16, v28
+; RV64-2048-NEXT:    vslideup.vi v16, v12, 0
+; RV64-2048-NEXT:    vsetvli zero, a3, e16, m2, ta, mu
+; RV64-2048-NEXT:    vmv.v.i v12, 0
+; RV64-2048-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
+; RV64-2048-NEXT:    vslideup.vx v16, v12, a3
+; RV64-2048-NEXT:    lui a2, %hi(.LCPI1_0)
+; RV64-2048-NEXT:    addi a2, a2, %lo(.LCPI1_0)
+; RV64-2048-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; RV64-2048-NEXT:    vle16.v v20, (a2)
+; RV64-2048-NEXT:    vrgather.vv v24, v16, v20
+; RV64-2048-NEXT:    vsetvli zero, a3, e16, m4, tu, mu
+; RV64-2048-NEXT:    vslideup.vi v28, v8, 0
+; RV64-2048-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
+; RV64-2048-NEXT:    vslideup.vx v28, v12, a3
 ; RV64-2048-NEXT:    lui a2, %hi(.LCPI1_1)
 ; RV64-2048-NEXT:    addi a2, a2, %lo(.LCPI1_1)
-; RV64-2048-NEXT:    vsetvli zero, a3, e16, m2, ta, mu
-; RV64-2048-NEXT:    vle16.v v10, (a2)
-; RV64-2048-NEXT:    vrgather.vv v12, v26, v10
-; RV64-2048-NEXT:    vrgather.vv v16, v12, v8
-; RV64-2048-NEXT:    vsetvli zero, zero, e32, m4, tu, mu
-; RV64-2048-NEXT:    vslideup.vx v28, v16, a1
-; RV64-2048-NEXT:    addi a1, zero, 127
-; RV64-2048-NEXT:    vsetivli zero, 1, e64, m4, ta, mu
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a6, v8
-; RV64-2048-NEXT:    addi a1, zero, 126
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s3, v8
-; RV64-2048-NEXT:    addi a1, zero, 125
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s4, v8
-; RV64-2048-NEXT:    addi a1, zero, 124
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s5, v8
-; RV64-2048-NEXT:    addi a1, zero, 123
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s6, v8
-; RV64-2048-NEXT:    addi a1, zero, 122
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s7, v8
-; RV64-2048-NEXT:    addi a1, zero, 121
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s8, v8
-; RV64-2048-NEXT:    addi a1, zero, 120
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s9, v8
-; RV64-2048-NEXT:    addi a1, zero, 119
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t4, v8
-; RV64-2048-NEXT:    addi a1, zero, 118
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t5, v8
-; RV64-2048-NEXT:    addi a1, zero, 117
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t6, v8
-; RV64-2048-NEXT:    addi a1, zero, 116
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s10, v8
-; RV64-2048-NEXT:    addi a1, zero, 115
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s11, v8
-; RV64-2048-NEXT:    addi a1, zero, 114
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t0, v8
-; RV64-2048-NEXT:    addi a1, zero, 113
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t1, v8
-; RV64-2048-NEXT:    addi a1, zero, 112
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t2, v8
-; RV64-2048-NEXT:    addi a1, zero, 111
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a3, v8
-; RV64-2048-NEXT:    addi a1, zero, 110
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    addi a2, zero, 109
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a2
-; RV64-2048-NEXT:    vmv.x.s a2, v8
-; RV64-2048-NEXT:    addi a4, zero, 108
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a4
-; RV64-2048-NEXT:    vmv.x.s a4, v8
-; RV64-2048-NEXT:    addi s1, zero, 107
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, s1
-; RV64-2048-NEXT:    vmv.x.s s2, v8
-; RV64-2048-NEXT:    addi a5, zero, 106
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v8
-; RV64-2048-NEXT:    sd a5, 504(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    addi a5, zero, 105
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v8
-; RV64-2048-NEXT:    sd a5, 496(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    addi a5, zero, 104
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v8
-; RV64-2048-NEXT:    sd a5, 488(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    addi a5, zero, 103
-; RV64-2048-NEXT:    vslidedown.vx v24, v28, a5
-; RV64-2048-NEXT:    addi a5, zero, 102
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a5
-; RV64-2048-NEXT:    addi a5, zero, 101
-; RV64-2048-NEXT:    vslidedown.vx v12, v28, a5
-; RV64-2048-NEXT:    addi a5, zero, 100
-; RV64-2048-NEXT:    vslidedown.vx v16, v28, a5
-; RV64-2048-NEXT:    addi a5, zero, 99
-; RV64-2048-NEXT:    vslidedown.vx v20, v28, a5
-; RV64-2048-NEXT:    addi a5, zero, 98
-; RV64-2048-NEXT:    vslidedown.vx v0, v28, a5
-; RV64-2048-NEXT:    addi a5, zero, 97
-; RV64-2048-NEXT:    vslidedown.vx v4, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v24
-; RV64-2048-NEXT:    sd a5, 480(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh a6, 1016(sp)
-; RV64-2048-NEXT:    srli a5, a6, 32
-; RV64-2048-NEXT:    sh a5, 1020(sp)
-; RV64-2048-NEXT:    addi a5, zero, 96
-; RV64-2048-NEXT:    vslidedown.vx v24, v28, a5
-; RV64-2048-NEXT:    vmv.x.s s1, v8
-; RV64-2048-NEXT:    sh s3, 1008(sp)
-; RV64-2048-NEXT:    srli a5, s3, 32
-; RV64-2048-NEXT:    sh a5, 1012(sp)
-; RV64-2048-NEXT:    addi a5, zero, 95
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a5
-; RV64-2048-NEXT:    vmv.x.s ra, v12
-; RV64-2048-NEXT:    sh s4, 1000(sp)
-; RV64-2048-NEXT:    srli a5, s4, 32
-; RV64-2048-NEXT:    sh a5, 1004(sp)
-; RV64-2048-NEXT:    addi a5, zero, 94
-; RV64-2048-NEXT:    vslidedown.vx v12, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a6, v16
-; RV64-2048-NEXT:    sh s5, 992(sp)
-; RV64-2048-NEXT:    srli a5, s5, 32
-; RV64-2048-NEXT:    sh a5, 996(sp)
-; RV64-2048-NEXT:    addi a5, zero, 93
-; RV64-2048-NEXT:    vslidedown.vx v16, v28, a5
-; RV64-2048-NEXT:    vmv.x.s s5, v20
-; RV64-2048-NEXT:    sh s6, 984(sp)
-; RV64-2048-NEXT:    srli a5, s6, 32
-; RV64-2048-NEXT:    sh a5, 988(sp)
-; RV64-2048-NEXT:    addi a5, zero, 92
-; RV64-2048-NEXT:    vslidedown.vx v20, v28, a5
-; RV64-2048-NEXT:    vmv.x.s s6, v0
-; RV64-2048-NEXT:    sh s7, 976(sp)
-; RV64-2048-NEXT:    srli a5, s7, 32
-; RV64-2048-NEXT:    sh a5, 980(sp)
-; RV64-2048-NEXT:    addi a5, zero, 91
-; RV64-2048-NEXT:    vslidedown.vx v0, v28, a5
-; RV64-2048-NEXT:    vmv.x.s s7, v4
-; RV64-2048-NEXT:    sh s8, 968(sp)
-; RV64-2048-NEXT:    srli a5, s8, 32
-; RV64-2048-NEXT:    sh a5, 972(sp)
-; RV64-2048-NEXT:    addi a5, zero, 90
-; RV64-2048-NEXT:    vslidedown.vx v4, v28, a5
-; RV64-2048-NEXT:    vmv.x.s s8, v24
-; RV64-2048-NEXT:    sh s9, 960(sp)
-; RV64-2048-NEXT:    srli a5, s9, 32
-; RV64-2048-NEXT:    sh a5, 964(sp)
-; RV64-2048-NEXT:    addi a5, zero, 89
-; RV64-2048-NEXT:    vslidedown.vx v24, v28, a5
-; RV64-2048-NEXT:    vmv.x.s s9, v8
-; RV64-2048-NEXT:    sh t4, 952(sp)
-; RV64-2048-NEXT:    srli a5, t4, 32
-; RV64-2048-NEXT:    sh a5, 956(sp)
-; RV64-2048-NEXT:    addi a5, zero, 88
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v12
-; RV64-2048-NEXT:    sd a5, 440(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh t5, 944(sp)
-; RV64-2048-NEXT:    srli a5, t5, 32
-; RV64-2048-NEXT:    sh a5, 948(sp)
-; RV64-2048-NEXT:    addi a5, zero, 87
-; RV64-2048-NEXT:    vslidedown.vx v12, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v16
-; RV64-2048-NEXT:    sd a5, 472(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh t6, 936(sp)
-; RV64-2048-NEXT:    srli a5, t6, 32
-; RV64-2048-NEXT:    sh a5, 940(sp)
-; RV64-2048-NEXT:    addi a5, zero, 86
-; RV64-2048-NEXT:    vslidedown.vx v16, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v20
-; RV64-2048-NEXT:    sd a5, 464(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh s10, 928(sp)
-; RV64-2048-NEXT:    srli a5, s10, 32
-; RV64-2048-NEXT:    sh a5, 932(sp)
-; RV64-2048-NEXT:    addi a5, zero, 85
-; RV64-2048-NEXT:    vslidedown.vx v20, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v0
-; RV64-2048-NEXT:    sd a5, 456(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh s11, 920(sp)
-; RV64-2048-NEXT:    srli a5, s11, 32
-; RV64-2048-NEXT:    sh a5, 924(sp)
-; RV64-2048-NEXT:    addi a5, zero, 84
-; RV64-2048-NEXT:    vslidedown.vx v0, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v4
-; RV64-2048-NEXT:    sd a5, 448(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh t0, 912(sp)
-; RV64-2048-NEXT:    srli a5, t0, 32
-; RV64-2048-NEXT:    sh a5, 916(sp)
-; RV64-2048-NEXT:    addi a5, zero, 83
-; RV64-2048-NEXT:    vslidedown.vx v4, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a5, v24
-; RV64-2048-NEXT:    sd a5, 432(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh t1, 904(sp)
-; RV64-2048-NEXT:    srli a5, t1, 32
-; RV64-2048-NEXT:    sh a5, 908(sp)
-; RV64-2048-NEXT:    addi a5, zero, 82
-; RV64-2048-NEXT:    vslidedown.vx v24, v28, a5
-; RV64-2048-NEXT:    vmv.x.s a7, v8
-; RV64-2048-NEXT:    sh t2, 896(sp)
-; RV64-2048-NEXT:    srli a5, t2, 32
-; RV64-2048-NEXT:    sh a5, 900(sp)
-; RV64-2048-NEXT:    addi a5, zero, 81
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a5
-; RV64-2048-NEXT:    vmv.x.s t0, v12
-; RV64-2048-NEXT:    sh a3, 888(sp)
-; RV64-2048-NEXT:    srli a3, a3, 32
-; RV64-2048-NEXT:    sh a3, 892(sp)
-; RV64-2048-NEXT:    addi a3, zero, 80
-; RV64-2048-NEXT:    vslidedown.vx v12, v28, a3
-; RV64-2048-NEXT:    vmv.x.s t1, v16
-; RV64-2048-NEXT:    sh a1, 880(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 884(sp)
-; RV64-2048-NEXT:    addi a1, zero, 79
-; RV64-2048-NEXT:    vslidedown.vx v16, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t2, v20
-; RV64-2048-NEXT:    sh a2, 872(sp)
-; RV64-2048-NEXT:    srli a2, a2, 32
-; RV64-2048-NEXT:    sh a2, 876(sp)
-; RV64-2048-NEXT:    addi a2, zero, 78
-; RV64-2048-NEXT:    vslidedown.vx v20, v28, a2
-; RV64-2048-NEXT:    vmv.x.s t3, v0
-; RV64-2048-NEXT:    sh a4, 864(sp)
-; RV64-2048-NEXT:    srli a4, a4, 32
-; RV64-2048-NEXT:    sh a4, 868(sp)
-; RV64-2048-NEXT:    addi a4, zero, 77
-; RV64-2048-NEXT:    vslidedown.vx v0, v28, a4
-; RV64-2048-NEXT:    vmv.x.s t4, v4
-; RV64-2048-NEXT:    sh s2, 856(sp)
-; RV64-2048-NEXT:    srli a5, s2, 32
-; RV64-2048-NEXT:    sh a5, 860(sp)
-; RV64-2048-NEXT:    addi a5, zero, 76
-; RV64-2048-NEXT:    vslidedown.vx v4, v28, a5
-; RV64-2048-NEXT:    vmv.x.s t5, v24
-; RV64-2048-NEXT:    ld a1, 504(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a1, 848(sp)
-; RV64-2048-NEXT:    srli a5, a1, 32
-; RV64-2048-NEXT:    sh a5, 852(sp)
-; RV64-2048-NEXT:    addi a5, zero, 75
-; RV64-2048-NEXT:    vslidedown.vx v24, v28, a5
-; RV64-2048-NEXT:    vmv.x.s t6, v8
-; RV64-2048-NEXT:    ld a1, 496(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a1, 840(sp)
-; RV64-2048-NEXT:    srli a3, a1, 32
-; RV64-2048-NEXT:    sh a3, 844(sp)
-; RV64-2048-NEXT:    addi a3, zero, 74
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a3
-; RV64-2048-NEXT:    vmv.x.s s10, v12
-; RV64-2048-NEXT:    ld a1, 488(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a1, 832(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 836(sp)
-; RV64-2048-NEXT:    addi a1, zero, 73
-; RV64-2048-NEXT:    vslidedown.vx v12, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s11, v16
-; RV64-2048-NEXT:    ld a1, 480(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a1, 824(sp)
-; RV64-2048-NEXT:    srli a2, a1, 32
-; RV64-2048-NEXT:    sh a2, 828(sp)
-; RV64-2048-NEXT:    addi a2, zero, 72
-; RV64-2048-NEXT:    vslidedown.vx v16, v28, a2
-; RV64-2048-NEXT:    vmv.x.s s2, v20
-; RV64-2048-NEXT:    sh s1, 816(sp)
-; RV64-2048-NEXT:    srli a4, s1, 32
-; RV64-2048-NEXT:    sh a4, 820(sp)
-; RV64-2048-NEXT:    addi a4, zero, 71
-; RV64-2048-NEXT:    vslidedown.vx v20, v28, a4
-; RV64-2048-NEXT:    vmv.x.s s3, v0
-; RV64-2048-NEXT:    sh ra, 808(sp)
-; RV64-2048-NEXT:    srli s1, ra, 32
-; RV64-2048-NEXT:    sh s1, 812(sp)
-; RV64-2048-NEXT:    addi s1, zero, 70
-; RV64-2048-NEXT:    vslidedown.vx v0, v28, s1
-; RV64-2048-NEXT:    vmv.x.s s4, v4
-; RV64-2048-NEXT:    sh a6, 800(sp)
-; RV64-2048-NEXT:    srli a5, a6, 32
-; RV64-2048-NEXT:    sh a5, 804(sp)
-; RV64-2048-NEXT:    addi a5, zero, 69
-; RV64-2048-NEXT:    vslidedown.vx v4, v28, a5
-; RV64-2048-NEXT:    addi a1, sp, 1944
-; RV64-2048-NEXT:    vs4r.v v4, (a1) # Unknown-size Folded Spill
-; RV64-2048-NEXT:    vmv.x.s a5, v24
-; RV64-2048-NEXT:    sd a5, 504(sp) # 8-byte Folded Spill
-; RV64-2048-NEXT:    sh s5, 792(sp)
-; RV64-2048-NEXT:    srli a3, s5, 32
-; RV64-2048-NEXT:    sh a3, 796(sp)
-; RV64-2048-NEXT:    addi a3, zero, 68
-; RV64-2048-NEXT:    vslidedown.vx v4, v28, a3
-; RV64-2048-NEXT:    vmv.x.s s5, v8
-; RV64-2048-NEXT:    sh s6, 784(sp)
-; RV64-2048-NEXT:    srli a1, s6, 32
-; RV64-2048-NEXT:    sh a1, 788(sp)
-; RV64-2048-NEXT:    addi a1, zero, 67
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s6, v12
-; RV64-2048-NEXT:    sh s7, 776(sp)
-; RV64-2048-NEXT:    srli a2, s7, 32
-; RV64-2048-NEXT:    sh a2, 780(sp)
-; RV64-2048-NEXT:    addi a2, zero, 66
-; RV64-2048-NEXT:    vslidedown.vx v12, v28, a2
-; RV64-2048-NEXT:    vmv.x.s s7, v16
-; RV64-2048-NEXT:    sh s8, 768(sp)
-; RV64-2048-NEXT:    srli a4, s8, 32
-; RV64-2048-NEXT:    sh a4, 772(sp)
-; RV64-2048-NEXT:    addi a4, zero, 65
-; RV64-2048-NEXT:    vslidedown.vx v24, v28, a4
-; RV64-2048-NEXT:    vmv.x.s s8, v20
-; RV64-2048-NEXT:    sh s9, 760(sp)
-; RV64-2048-NEXT:    srli s1, s9, 32
-; RV64-2048-NEXT:    sh s1, 764(sp)
-; RV64-2048-NEXT:    addi s1, zero, 64
-; RV64-2048-NEXT:    vslidedown.vx v16, v28, s1
-; RV64-2048-NEXT:    vmv.x.s s9, v0
-; RV64-2048-NEXT:    ld ra, 440(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh ra, 752(sp)
-; RV64-2048-NEXT:    ld a1, 472(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a1, 744(sp)
-; RV64-2048-NEXT:    ld a2, 464(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a2, 736(sp)
-; RV64-2048-NEXT:    ld a4, 456(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a4, 728(sp)
-; RV64-2048-NEXT:    ld s1, 448(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh s1, 720(sp)
-; RV64-2048-NEXT:    ld a6, 432(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    sh a6, 712(sp)
-; RV64-2048-NEXT:    sh a7, 704(sp)
-; RV64-2048-NEXT:    sh t0, 696(sp)
-; RV64-2048-NEXT:    sh t1, 688(sp)
-; RV64-2048-NEXT:    sh t2, 680(sp)
-; RV64-2048-NEXT:    sh t3, 672(sp)
-; RV64-2048-NEXT:    sh t4, 664(sp)
-; RV64-2048-NEXT:    sh t5, 656(sp)
-; RV64-2048-NEXT:    sh t6, 648(sp)
-; RV64-2048-NEXT:    sh s10, 640(sp)
-; RV64-2048-NEXT:    sh s11, 632(sp)
-; RV64-2048-NEXT:    sh s2, 624(sp)
-; RV64-2048-NEXT:    sh s3, 616(sp)
-; RV64-2048-NEXT:    sh s4, 608(sp)
-; RV64-2048-NEXT:    sh a5, 600(sp)
-; RV64-2048-NEXT:    sh s5, 592(sp)
-; RV64-2048-NEXT:    sh s6, 584(sp)
-; RV64-2048-NEXT:    sh s7, 576(sp)
-; RV64-2048-NEXT:    sh s8, 568(sp)
-; RV64-2048-NEXT:    sh s9, 560(sp)
-; RV64-2048-NEXT:    srli a5, ra, 32
-; RV64-2048-NEXT:    sh a5, 756(sp)
-; RV64-2048-NEXT:    addi a3, sp, 1944
-; RV64-2048-NEXT:    vl4re8.v v20, (a3) # Unknown-size Folded Reload
-; RV64-2048-NEXT:    vmv.x.s ra, v20
-; RV64-2048-NEXT:    sh ra, 552(sp)
-; RV64-2048-NEXT:    srli a3, a1, 32
-; RV64-2048-NEXT:    sh a3, 748(sp)
-; RV64-2048-NEXT:    vmv.x.s a3, v4
-; RV64-2048-NEXT:    sh a3, 544(sp)
-; RV64-2048-NEXT:    srli a1, a2, 32
-; RV64-2048-NEXT:    sh a1, 740(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 536(sp)
-; RV64-2048-NEXT:    srli a2, a4, 32
-; RV64-2048-NEXT:    sh a2, 732(sp)
-; RV64-2048-NEXT:    vmv.x.s a2, v12
-; RV64-2048-NEXT:    sh a2, 528(sp)
-; RV64-2048-NEXT:    srli a4, s1, 32
-; RV64-2048-NEXT:    sh a4, 724(sp)
-; RV64-2048-NEXT:    vmv.x.s a4, v24
-; RV64-2048-NEXT:    sh a4, 520(sp)
-; RV64-2048-NEXT:    srli s1, a6, 32
-; RV64-2048-NEXT:    sh s1, 716(sp)
-; RV64-2048-NEXT:    vmv.x.s s1, v16
-; RV64-2048-NEXT:    sh s1, 512(sp)
-; RV64-2048-NEXT:    srli a5, a7, 32
-; RV64-2048-NEXT:    sh a5, 708(sp)
-; RV64-2048-NEXT:    srli a5, t0, 32
-; RV64-2048-NEXT:    sh a5, 700(sp)
-; RV64-2048-NEXT:    srli a5, t1, 32
-; RV64-2048-NEXT:    sh a5, 692(sp)
-; RV64-2048-NEXT:    srli a5, t2, 32
-; RV64-2048-NEXT:    sh a5, 684(sp)
-; RV64-2048-NEXT:    srli a5, t3, 32
-; RV64-2048-NEXT:    sh a5, 676(sp)
-; RV64-2048-NEXT:    srli a5, t4, 32
-; RV64-2048-NEXT:    sh a5, 668(sp)
-; RV64-2048-NEXT:    srli a5, t5, 32
-; RV64-2048-NEXT:    sh a5, 660(sp)
-; RV64-2048-NEXT:    srli a5, t6, 32
-; RV64-2048-NEXT:    sh a5, 652(sp)
-; RV64-2048-NEXT:    srli a5, s10, 32
-; RV64-2048-NEXT:    sh a5, 644(sp)
-; RV64-2048-NEXT:    srli a5, s11, 32
-; RV64-2048-NEXT:    sh a5, 636(sp)
-; RV64-2048-NEXT:    srli a5, s2, 32
-; RV64-2048-NEXT:    sh a5, 628(sp)
-; RV64-2048-NEXT:    srli a5, s3, 32
-; RV64-2048-NEXT:    sh a5, 620(sp)
-; RV64-2048-NEXT:    srli a5, s4, 32
-; RV64-2048-NEXT:    sh a5, 612(sp)
-; RV64-2048-NEXT:    ld a5, 504(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    srli a5, a5, 32
-; RV64-2048-NEXT:    sh a5, 604(sp)
-; RV64-2048-NEXT:    srli a5, s5, 32
-; RV64-2048-NEXT:    sh a5, 596(sp)
-; RV64-2048-NEXT:    srli a5, s6, 32
-; RV64-2048-NEXT:    sh a5, 588(sp)
-; RV64-2048-NEXT:    srli a5, s7, 32
-; RV64-2048-NEXT:    sh a5, 580(sp)
-; RV64-2048-NEXT:    srli a5, s8, 32
-; RV64-2048-NEXT:    sh a5, 572(sp)
-; RV64-2048-NEXT:    srli a5, s9, 32
-; RV64-2048-NEXT:    sh a5, 564(sp)
-; RV64-2048-NEXT:    srli a5, ra, 32
-; RV64-2048-NEXT:    sh a5, 556(sp)
-; RV64-2048-NEXT:    srli a3, a3, 32
-; RV64-2048-NEXT:    sh a3, 548(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 540(sp)
-; RV64-2048-NEXT:    srli a1, a2, 32
-; RV64-2048-NEXT:    sh a1, 532(sp)
-; RV64-2048-NEXT:    srli a1, a4, 32
-; RV64-2048-NEXT:    sh a1, 524(sp)
-; RV64-2048-NEXT:    srli a1, s1, 32
-; RV64-2048-NEXT:    sh a1, 516(sp)
-; RV64-2048-NEXT:    addi a1, zero, 63
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t1, v8
-; RV64-2048-NEXT:    addi a1, zero, 62
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a4, v8
-; RV64-2048-NEXT:    addi a1, zero, 61
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t0, v8
-; RV64-2048-NEXT:    addi a1, zero, 60
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t2, v8
-; RV64-2048-NEXT:    addi a1, zero, 59
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t3, v8
-; RV64-2048-NEXT:    addi a1, zero, 58
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s3, v8
-; RV64-2048-NEXT:    addi a1, zero, 57
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t4, v8
-; RV64-2048-NEXT:    addi a1, zero, 56
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t5, v8
-; RV64-2048-NEXT:    addi a1, zero, 55
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s t6, v8
-; RV64-2048-NEXT:    addi a1, zero, 54
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s2, v8
-; RV64-2048-NEXT:    addi a1, zero, 53
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s4, v8
-; RV64-2048-NEXT:    addi a1, zero, 52
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s5, v8
-; RV64-2048-NEXT:    addi a1, zero, 51
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s6, v8
-; RV64-2048-NEXT:    addi a1, zero, 50
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s7, v8
-; RV64-2048-NEXT:    addi a1, zero, 49
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s8, v8
-; RV64-2048-NEXT:    addi a1, zero, 48
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s9, v8
-; RV64-2048-NEXT:    addi a1, zero, 47
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s10, v8
-; RV64-2048-NEXT:    addi a1, zero, 46
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s11, v8
-; RV64-2048-NEXT:    addi a1, zero, 45
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s ra, v8
-; RV64-2048-NEXT:    addi a1, zero, 44
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a6, v8
-; RV64-2048-NEXT:    addi a1, zero, 43
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a7, v8
-; RV64-2048-NEXT:    addi a1, zero, 42
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s s1, v8
-; RV64-2048-NEXT:    addi a1, zero, 41
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a5, v8
-; RV64-2048-NEXT:    addi a1, zero, 40
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a1
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    addi a2, zero, 39
-; RV64-2048-NEXT:    vslidedown.vx v0, v28, a2
-; RV64-2048-NEXT:    addi a2, zero, 38
-; RV64-2048-NEXT:    vslidedown.vx v4, v28, a2
-; RV64-2048-NEXT:    addi a2, zero, 37
-; RV64-2048-NEXT:    vslidedown.vx v8, v28, a2
-; RV64-2048-NEXT:    addi a2, zero, 36
-; RV64-2048-NEXT:    vslidedown.vx v12, v28, a2
-; RV64-2048-NEXT:    addi a2, zero, 35
-; RV64-2048-NEXT:    vslidedown.vx v16, v28, a2
-; RV64-2048-NEXT:    addi a2, zero, 34
-; RV64-2048-NEXT:    vslidedown.vx v20, v28, a2
-; RV64-2048-NEXT:    vmv.x.s a2, v28
-; RV64-2048-NEXT:    sh a2, 1024(sp)
-; RV64-2048-NEXT:    srli a2, a2, 32
-; RV64-2048-NEXT:    sh a2, 1028(sp)
-; RV64-2048-NEXT:    addi a2, zero, 33
-; RV64-2048-NEXT:    vslidedown.vx v24, v28, a2
-; RV64-2048-NEXT:    vmv.x.s a2, v0
-; RV64-2048-NEXT:    sh t1, 1528(sp)
-; RV64-2048-NEXT:    srli a3, t1, 32
-; RV64-2048-NEXT:    sh a3, 1532(sp)
-; RV64-2048-NEXT:    addi a3, zero, 32
-; RV64-2048-NEXT:    vslidedown.vx v0, v28, a3
-; RV64-2048-NEXT:    vmv.x.s a3, v4
-; RV64-2048-NEXT:    sh a4, 1520(sp)
-; RV64-2048-NEXT:    srli a4, a4, 32
-; RV64-2048-NEXT:    sh a4, 1524(sp)
-; RV64-2048-NEXT:    sh t0, 1512(sp)
-; RV64-2048-NEXT:    srli a4, t0, 32
-; RV64-2048-NEXT:    sh a4, 1516(sp)
-; RV64-2048-NEXT:    sh t2, 1504(sp)
-; RV64-2048-NEXT:    srli a4, t2, 32
-; RV64-2048-NEXT:    sh a4, 1508(sp)
-; RV64-2048-NEXT:    sh t3, 1496(sp)
-; RV64-2048-NEXT:    srli a4, t3, 32
-; RV64-2048-NEXT:    sh a4, 1500(sp)
-; RV64-2048-NEXT:    sh s3, 1488(sp)
-; RV64-2048-NEXT:    srli a4, s3, 32
-; RV64-2048-NEXT:    sh a4, 1492(sp)
-; RV64-2048-NEXT:    sh t4, 1480(sp)
-; RV64-2048-NEXT:    srli a4, t4, 32
-; RV64-2048-NEXT:    sh a4, 1484(sp)
-; RV64-2048-NEXT:    sh t5, 1472(sp)
-; RV64-2048-NEXT:    srli a4, t5, 32
-; RV64-2048-NEXT:    sh a4, 1476(sp)
-; RV64-2048-NEXT:    sh t6, 1464(sp)
-; RV64-2048-NEXT:    srli a4, t6, 32
-; RV64-2048-NEXT:    sh a4, 1468(sp)
-; RV64-2048-NEXT:    sh s2, 1456(sp)
-; RV64-2048-NEXT:    srli a4, s2, 32
-; RV64-2048-NEXT:    sh a4, 1460(sp)
-; RV64-2048-NEXT:    sh s4, 1448(sp)
-; RV64-2048-NEXT:    srli a4, s4, 32
-; RV64-2048-NEXT:    sh a4, 1452(sp)
-; RV64-2048-NEXT:    sh s5, 1440(sp)
-; RV64-2048-NEXT:    srli a4, s5, 32
-; RV64-2048-NEXT:    sh a4, 1444(sp)
-; RV64-2048-NEXT:    sh s6, 1432(sp)
-; RV64-2048-NEXT:    srli a4, s6, 32
-; RV64-2048-NEXT:    sh a4, 1436(sp)
-; RV64-2048-NEXT:    sh s7, 1424(sp)
-; RV64-2048-NEXT:    srli a4, s7, 32
-; RV64-2048-NEXT:    sh a4, 1428(sp)
-; RV64-2048-NEXT:    sh s8, 1416(sp)
-; RV64-2048-NEXT:    srli a4, s8, 32
-; RV64-2048-NEXT:    sh a4, 1420(sp)
-; RV64-2048-NEXT:    sh s9, 1408(sp)
-; RV64-2048-NEXT:    srli a4, s9, 32
-; RV64-2048-NEXT:    sh a4, 1412(sp)
-; RV64-2048-NEXT:    sh s10, 1400(sp)
-; RV64-2048-NEXT:    srli a4, s10, 32
-; RV64-2048-NEXT:    sh a4, 1404(sp)
-; RV64-2048-NEXT:    sh s11, 1392(sp)
-; RV64-2048-NEXT:    srli a4, s11, 32
-; RV64-2048-NEXT:    sh a4, 1396(sp)
-; RV64-2048-NEXT:    sh ra, 1384(sp)
-; RV64-2048-NEXT:    srli a4, ra, 32
-; RV64-2048-NEXT:    sh a4, 1388(sp)
-; RV64-2048-NEXT:    sh a6, 1376(sp)
-; RV64-2048-NEXT:    srli a4, a6, 32
-; RV64-2048-NEXT:    sh a4, 1380(sp)
-; RV64-2048-NEXT:    sh a7, 1368(sp)
-; RV64-2048-NEXT:    srli a4, a7, 32
-; RV64-2048-NEXT:    sh a4, 1372(sp)
-; RV64-2048-NEXT:    sh s1, 1360(sp)
-; RV64-2048-NEXT:    srli a4, s1, 32
-; RV64-2048-NEXT:    sh a4, 1364(sp)
-; RV64-2048-NEXT:    sh a5, 1352(sp)
-; RV64-2048-NEXT:    srli a4, a5, 32
-; RV64-2048-NEXT:    sh a4, 1356(sp)
-; RV64-2048-NEXT:    sh a1, 1344(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1348(sp)
-; RV64-2048-NEXT:    sh a2, 1336(sp)
-; RV64-2048-NEXT:    srli a1, a2, 32
-; RV64-2048-NEXT:    sh a1, 1340(sp)
-; RV64-2048-NEXT:    sh a3, 1328(sp)
-; RV64-2048-NEXT:    srli a1, a3, 32
-; RV64-2048-NEXT:    sh a1, 1332(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1320(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1324(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v12
-; RV64-2048-NEXT:    sh a1, 1312(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1316(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v16
-; RV64-2048-NEXT:    sh a1, 1304(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1308(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v20
-; RV64-2048-NEXT:    sh a1, 1296(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1300(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v24
-; RV64-2048-NEXT:    sh a1, 1288(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1292(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v0
-; RV64-2048-NEXT:    sh a1, 1280(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 31
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1284(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1272(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 30
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1276(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1264(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 29
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1268(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1256(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 28
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1260(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1248(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 27
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1252(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1240(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 26
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1244(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1232(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 25
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1236(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1224(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 24
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1228(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1216(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 23
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1220(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1208(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 22
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1212(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1200(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 21
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1204(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1192(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 20
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1196(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1184(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 19
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1188(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1176(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 18
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1180(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1168(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 17
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1172(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1160(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 16
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1164(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1152(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 15
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1156(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1144(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 14
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1148(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1136(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 13
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1140(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1128(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 12
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1132(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1120(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 11
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1124(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1112(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 10
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1116(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1104(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 9
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1108(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1096(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 8
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1100(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1088(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 7
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1092(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1080(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 6
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1084(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1072(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 5
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1076(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1064(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 4
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1068(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1056(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 3
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1060(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1048(sp)
-; RV64-2048-NEXT:    vslidedown.vi v8, v28, 2
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1052(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v8
-; RV64-2048-NEXT:    sh a1, 1040(sp)
-; RV64-2048-NEXT:    vslidedown.vi v28, v28, 1
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1044(sp)
-; RV64-2048-NEXT:    vmv.x.s a1, v28
-; RV64-2048-NEXT:    sh a1, 1032(sp)
-; RV64-2048-NEXT:    srli a1, a1, 32
-; RV64-2048-NEXT:    sh a1, 1036(sp)
-; RV64-2048-NEXT:    addi a2, zero, 256
-; RV64-2048-NEXT:    vsetvli zero, a2, e16, m2, ta, mu
-; RV64-2048-NEXT:    addi a1, sp, 512
-; RV64-2048-NEXT:    vle16.v v26, (a1)
-; RV64-2048-NEXT:    addi a1, sp, 1024
-; RV64-2048-NEXT:    vle16.v v28, (a1)
-; RV64-2048-NEXT:    lui a1, 1026731
-; RV64-2048-NEXT:    addiw a1, a1, -1365
-; RV64-2048-NEXT:    slli a1, a1, 12
-; RV64-2048-NEXT:    addi a1, a1, -1365
-; RV64-2048-NEXT:    slli a1, a1, 12
-; RV64-2048-NEXT:    addi a1, a1, -1365
-; RV64-2048-NEXT:    slli a1, a1, 12
-; RV64-2048-NEXT:    addi a1, a1, -1366
-; RV64-2048-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
-; RV64-2048-NEXT:    vmv.s.x v25, a1
+; RV64-2048-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; RV64-2048-NEXT:    vle16.v v12, (a2)
+; RV64-2048-NEXT:    vrgather.vv v8, v24, v12
+; RV64-2048-NEXT:    lui a2, 1026731
+; RV64-2048-NEXT:    addiw a2, a2, -1365
+; RV64-2048-NEXT:    slli a2, a2, 12
+; RV64-2048-NEXT:    addi a2, a2, -1365
+; RV64-2048-NEXT:    slli a2, a2, 12
+; RV64-2048-NEXT:    addi a2, a2, -1365
+; RV64-2048-NEXT:    slli a2, a2, 12
+; RV64-2048-NEXT:    addi a2, a2, -1366
+; RV64-2048-NEXT:    vsetivli zero, 8, e64, m1, ta, mu
+; RV64-2048-NEXT:    vmv.s.x v25, a2
 ; RV64-2048-NEXT:    vsetivli zero, 2, e64, m1, tu, mu
 ; RV64-2048-NEXT:    vmv1r.v v0, v25
 ; RV64-2048-NEXT:    vslideup.vi v0, v25, 1
@@ -1873,48 +274,22 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16
 ; RV64-2048-NEXT:    vslideup.vi v0, v25, 2
 ; RV64-2048-NEXT:    vsetivli zero, 4, e64, m1, tu, mu
 ; RV64-2048-NEXT:    vslideup.vi v0, v25, 3
-; RV64-2048-NEXT:    vsetvli zero, a2, e16, m2, ta, mu
-; RV64-2048-NEXT:    lui a1, %hi(.LCPI1_2)
-; RV64-2048-NEXT:    addi a1, a1, %lo(.LCPI1_2)
-; RV64-2048-NEXT:    vle16.v v30, (a1)
-; RV64-2048-NEXT:    lui a1, %hi(.LCPI1_3)
-; RV64-2048-NEXT:    addi a1, a1, %lo(.LCPI1_3)
-; RV64-2048-NEXT:    vle16.v v8, (a1)
-; RV64-2048-NEXT:    vrgather.vv v10, v28, v30
-; RV64-2048-NEXT:    vsetvli zero, zero, e16, m2, tu, mu
-; RV64-2048-NEXT:    csrr a1, vlenb
-; RV64-2048-NEXT:    slli a1, a1, 2
-; RV64-2048-NEXT:    add a1, sp, a1
-; RV64-2048-NEXT:    addi a1, a1, 1944
-; RV64-2048-NEXT:    vl2re8.v v12, (a1) # Unknown-size Folded Reload
-; RV64-2048-NEXT:    vrgather.vv v10, v12, v8, v0.t
-; RV64-2048-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64-2048-NEXT:    lui a1, %hi(.LCPI1_4)
-; RV64-2048-NEXT:    addi a1, a1, %lo(.LCPI1_4)
-; RV64-2048-NEXT:    vle16.v v28, (a1)
-; RV64-2048-NEXT:    vrgather.vv v8, v26, v30
-; RV64-2048-NEXT:    vsetvli zero, zero, e16, m2, tu, mu
-; RV64-2048-NEXT:    vrgather.vv v8, v12, v28, v0.t
-; RV64-2048-NEXT:    addi a1, a0, 512
-; RV64-2048-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64-2048-NEXT:    vse16.v v8, (a1)
-; RV64-2048-NEXT:    vse16.v v10, (a0)
-; RV64-2048-NEXT:    addi sp, s0, -2048
-; RV64-2048-NEXT:    addi sp, sp, 16
-; RV64-2048-NEXT:    ld s11, 1928(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s10, 1936(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s9, 1944(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s8, 1952(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s7, 1960(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s6, 1968(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s5, 1976(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s4, 1984(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s3, 1992(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s2, 2000(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
-; RV64-2048-NEXT:    addi sp, sp, 2032
+; RV64-2048-NEXT:    vsetivli zero, 5, e64, m1, tu, mu
+; RV64-2048-NEXT:    vslideup.vi v0, v25, 4
+; RV64-2048-NEXT:    vsetivli zero, 6, e64, m1, tu, mu
+; RV64-2048-NEXT:    vslideup.vi v0, v25, 5
+; RV64-2048-NEXT:    vsetivli zero, 7, e64, m1, tu, mu
+; RV64-2048-NEXT:    vslideup.vi v0, v25, 6
+; RV64-2048-NEXT:    vsetivli zero, 8, e64, m1, tu, mu
+; RV64-2048-NEXT:    vslideup.vi v0, v25, 7
+; RV64-2048-NEXT:    lui a2, %hi(.LCPI1_2)
+; RV64-2048-NEXT:    addi a2, a2, %lo(.LCPI1_2)
+; RV64-2048-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; RV64-2048-NEXT:    vle16.v v12, (a2)
+; RV64-2048-NEXT:    vsetvli zero, zero, e16, m4, tu, mu
+; RV64-2048-NEXT:    vrgather.vv v8, v28, v12, v0.t
+; RV64-2048-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
+; RV64-2048-NEXT:    vse16.v v8, (a0)
 ; RV64-2048-NEXT:    ret
 entry:
   %ve = load <256 x i16>, <256 x i16>* %0, align 512

diff  --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index da77af166b0ba..a1e67842b7807 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -99,6 +99,8 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v64i8:    return "MVT::v64i8";
   case MVT::v128i8:   return "MVT::v128i8";
   case MVT::v256i8:   return "MVT::v256i8";
+  case MVT::v512i8:   return "MVT::v512i8";
+  case MVT::v1024i8:  return "MVT::v1024i8";
   case MVT::v1i16:    return "MVT::v1i16";
   case MVT::v2i16:    return "MVT::v2i16";
   case MVT::v3i16:    return "MVT::v3i16";
@@ -109,6 +111,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v64i16:   return "MVT::v64i16";
   case MVT::v128i16:  return "MVT::v128i16";
   case MVT::v256i16:  return "MVT::v256i16";
+  case MVT::v512i16:  return "MVT::v512i16";
   case MVT::v1i32:    return "MVT::v1i32";
   case MVT::v2i32:    return "MVT::v2i32";
   case MVT::v3i32:    return "MVT::v3i32";
@@ -143,6 +146,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v64f16:   return "MVT::v64f16";
   case MVT::v128f16:  return "MVT::v128f16";
   case MVT::v256f16:  return "MVT::v256f16";
+  case MVT::v512f16:  return "MVT::v512f16";
   case MVT::v2bf16:   return "MVT::v2bf16";
   case MVT::v3bf16:   return "MVT::v3bf16";
   case MVT::v4bf16:   return "MVT::v4bf16";


        


More information about the llvm-commits mailing list