[llvm] ccd1e08 - [RISCV] Add a test case showing inefficient vector codegen

Fraser Cormack via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 8 03:15:31 PDT 2021


Author: Fraser Cormack
Date: 2021-06-08T11:07:12+01:00
New Revision: ccd1e087f3702d5ccdfcce24ac7f7d2877921165

URL: https://github.com/llvm/llvm-project/commit/ccd1e087f3702d5ccdfcce24ac7f7d2877921165
DIFF: https://github.com/llvm/llvm-project/commit/ccd1e087f3702d5ccdfcce24ac7f7d2877921165.diff

LOG: [RISCV] Add a test case showing inefficient vector codegen

Added: 
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
new file mode 100644
index 000000000000..58cd8d1c6af6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
@@ -0,0 +1,1206 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512
+
+; FIXME: A larger VLEN is producing worse code.
+; FIXME: v256i16 is legal so v512i8 should be too since they're the same size.
+define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) {
+; VLEN256-LABEL: bitcast_1024B:
+; VLEN256:       # %bb.0:
+; VLEN256-NEXT:    addi a1, a0, 256
+; VLEN256-NEXT:    addi a2, zero, 256
+; VLEN256-NEXT:    vsetvli zero, a2, e8,m8,ta,mu
+; VLEN256-NEXT:    vle8.v v24, (a0)
+; VLEN256-NEXT:    vle8.v v0, (a1)
+; VLEN256-NEXT:    vadd.vv v8, v24, v8
+; VLEN256-NEXT:    vadd.vv v16, v0, v16
+; VLEN256-NEXT:    ret
+;
+; VLEN512-LABEL: bitcast_1024B:
+; VLEN512:       # %bb.0:
+; VLEN512-NEXT:    addi sp, sp, -1024
+; VLEN512-NEXT:    .cfi_def_cfa_offset 1024
+; VLEN512-NEXT:    sd ra, 1016(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s0, 1008(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s1, 1000(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s2, 992(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s3, 984(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s4, 976(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s5, 968(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s6, 960(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s7, 952(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s8, 944(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s9, 936(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s10, 928(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    sd s11, 920(sp) # 8-byte Folded Spill
+; VLEN512-NEXT:    .cfi_offset ra, -8
+; VLEN512-NEXT:    .cfi_offset s0, -16
+; VLEN512-NEXT:    .cfi_offset s1, -24
+; VLEN512-NEXT:    .cfi_offset s2, -32
+; VLEN512-NEXT:    .cfi_offset s3, -40
+; VLEN512-NEXT:    .cfi_offset s4, -48
+; VLEN512-NEXT:    .cfi_offset s5, -56
+; VLEN512-NEXT:    .cfi_offset s6, -64
+; VLEN512-NEXT:    .cfi_offset s7, -72
+; VLEN512-NEXT:    .cfi_offset s8, -80
+; VLEN512-NEXT:    .cfi_offset s9, -88
+; VLEN512-NEXT:    .cfi_offset s10, -96
+; VLEN512-NEXT:    .cfi_offset s11, -104
+; VLEN512-NEXT:    addi s0, sp, 1024
+; VLEN512-NEXT:    .cfi_def_cfa s0, 0
+; VLEN512-NEXT:    csrr a0, vlenb
+; VLEN512-NEXT:    slli a0, a0, 3
+; VLEN512-NEXT:    sub sp, sp, a0
+; VLEN512-NEXT:    andi sp, sp, -256
+; VLEN512-NEXT:    addi a0, zero, 32
+; VLEN512-NEXT:    vsetivli zero, 1, e64,m8,ta,mu
+; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
+; VLEN512-NEXT:    vmv.x.s a6, v24
+; VLEN512-NEXT:    addi a0, zero, 33
+; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
+; VLEN512-NEXT:    addi a0, sp, 920
+; VLEN512-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; VLEN512-NEXT:    addi a0, zero, 34
+; VLEN512-NEXT:    addi a1, zero, 35
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s a7, v0
+; VLEN512-NEXT:    addi a1, zero, 36
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s t0, v0
+; VLEN512-NEXT:    addi a1, zero, 37
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s t1, v0
+; VLEN512-NEXT:    addi a1, zero, 38
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s t2, v0
+; VLEN512-NEXT:    addi a1, zero, 39
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s t3, v0
+; VLEN512-NEXT:    addi a1, zero, 40
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s t4, v0
+; VLEN512-NEXT:    addi a1, zero, 41
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s t5, v0
+; VLEN512-NEXT:    addi a1, zero, 42
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s t6, v0
+; VLEN512-NEXT:    addi a1, zero, 43
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s2, v0
+; VLEN512-NEXT:    addi a1, zero, 44
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s3, v0
+; VLEN512-NEXT:    addi a1, zero, 45
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s4, v0
+; VLEN512-NEXT:    addi a1, zero, 46
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s5, v0
+; VLEN512-NEXT:    addi a1, zero, 47
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s6, v0
+; VLEN512-NEXT:    addi a1, zero, 48
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s7, v0
+; VLEN512-NEXT:    addi a1, zero, 49
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s8, v0
+; VLEN512-NEXT:    addi a1, zero, 50
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s9, v0
+; VLEN512-NEXT:    addi a1, zero, 51
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s10, v0
+; VLEN512-NEXT:    addi a1, zero, 52
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s s11, v0
+; VLEN512-NEXT:    addi a1, zero, 53
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s ra, v0
+; VLEN512-NEXT:    addi a1, zero, 54
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a1
+; VLEN512-NEXT:    vmv.x.s a1, v0
+; VLEN512-NEXT:    addi a2, zero, 55
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a2
+; VLEN512-NEXT:    vmv.x.s a2, v0
+; VLEN512-NEXT:    addi a3, zero, 56
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a3
+; VLEN512-NEXT:    vmv.x.s s1, v0
+; VLEN512-NEXT:    addi a3, zero, 57
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a3
+; VLEN512-NEXT:    vmv.x.s a3, v0
+; VLEN512-NEXT:    addi a4, zero, 58
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a4
+; VLEN512-NEXT:    vmv.x.s a4, v0
+; VLEN512-NEXT:    addi a5, zero, 63
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a5
+; VLEN512-NEXT:    vmv.x.s a5, v0
+; VLEN512-NEXT:    vslidedown.vx v0, v8, a0
+; VLEN512-NEXT:    srli a0, a5, 56
+; VLEN512-NEXT:    sb a0, 511(sp)
+; VLEN512-NEXT:    srli a0, a5, 48
+; VLEN512-NEXT:    sb a0, 510(sp)
+; VLEN512-NEXT:    srli a0, a5, 40
+; VLEN512-NEXT:    sb a0, 509(sp)
+; VLEN512-NEXT:    srli a0, a5, 32
+; VLEN512-NEXT:    sb a0, 508(sp)
+; VLEN512-NEXT:    srli a0, a5, 24
+; VLEN512-NEXT:    sb a0, 507(sp)
+; VLEN512-NEXT:    srli a0, a5, 16
+; VLEN512-NEXT:    sb a0, 506(sp)
+; VLEN512-NEXT:    addi a0, zero, 62
+; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    sb a5, 504(sp)
+; VLEN512-NEXT:    srli a5, a5, 8
+; VLEN512-NEXT:    sb a5, 505(sp)
+; VLEN512-NEXT:    srli a5, a0, 56
+; VLEN512-NEXT:    sb a5, 503(sp)
+; VLEN512-NEXT:    srli a5, a0, 48
+; VLEN512-NEXT:    sb a5, 502(sp)
+; VLEN512-NEXT:    srli a5, a0, 40
+; VLEN512-NEXT:    sb a5, 501(sp)
+; VLEN512-NEXT:    srli a5, a0, 32
+; VLEN512-NEXT:    sb a5, 500(sp)
+; VLEN512-NEXT:    srli a5, a0, 24
+; VLEN512-NEXT:    sb a5, 499(sp)
+; VLEN512-NEXT:    srli a5, a0, 16
+; VLEN512-NEXT:    sb a5, 498(sp)
+; VLEN512-NEXT:    addi a5, zero, 61
+; VLEN512-NEXT:    vslidedown.vx v24, v8, a5
+; VLEN512-NEXT:    vmv.x.s a5, v24
+; VLEN512-NEXT:    sb a0, 496(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 497(sp)
+; VLEN512-NEXT:    srli a0, a5, 56
+; VLEN512-NEXT:    sb a0, 495(sp)
+; VLEN512-NEXT:    srli a0, a5, 48
+; VLEN512-NEXT:    sb a0, 494(sp)
+; VLEN512-NEXT:    srli a0, a5, 40
+; VLEN512-NEXT:    sb a0, 493(sp)
+; VLEN512-NEXT:    srli a0, a5, 32
+; VLEN512-NEXT:    sb a0, 492(sp)
+; VLEN512-NEXT:    srli a0, a5, 24
+; VLEN512-NEXT:    sb a0, 491(sp)
+; VLEN512-NEXT:    srli a0, a5, 16
+; VLEN512-NEXT:    sb a0, 490(sp)
+; VLEN512-NEXT:    addi a0, zero, 60
+; VLEN512-NEXT:    vslidedown.vx v24, v8, a0
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    sb a5, 488(sp)
+; VLEN512-NEXT:    srli a5, a5, 8
+; VLEN512-NEXT:    sb a5, 489(sp)
+; VLEN512-NEXT:    srli a5, a0, 56
+; VLEN512-NEXT:    sb a5, 487(sp)
+; VLEN512-NEXT:    srli a5, a0, 48
+; VLEN512-NEXT:    sb a5, 486(sp)
+; VLEN512-NEXT:    srli a5, a0, 40
+; VLEN512-NEXT:    sb a5, 485(sp)
+; VLEN512-NEXT:    srli a5, a0, 32
+; VLEN512-NEXT:    sb a5, 484(sp)
+; VLEN512-NEXT:    srli a5, a0, 24
+; VLEN512-NEXT:    sb a5, 483(sp)
+; VLEN512-NEXT:    srli a5, a0, 16
+; VLEN512-NEXT:    sb a5, 482(sp)
+; VLEN512-NEXT:    addi a5, zero, 59
+; VLEN512-NEXT:    vslidedown.vx v24, v8, a5
+; VLEN512-NEXT:    vmv.x.s a5, v24
+; VLEN512-NEXT:    sb a0, 480(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 481(sp)
+; VLEN512-NEXT:    srli a0, a5, 56
+; VLEN512-NEXT:    sb a0, 479(sp)
+; VLEN512-NEXT:    srli a0, a5, 48
+; VLEN512-NEXT:    sb a0, 478(sp)
+; VLEN512-NEXT:    srli a0, a5, 40
+; VLEN512-NEXT:    sb a0, 477(sp)
+; VLEN512-NEXT:    srli a0, a5, 32
+; VLEN512-NEXT:    sb a0, 476(sp)
+; VLEN512-NEXT:    srli a0, a5, 24
+; VLEN512-NEXT:    sb a0, 475(sp)
+; VLEN512-NEXT:    srli a0, a5, 16
+; VLEN512-NEXT:    sb a0, 474(sp)
+; VLEN512-NEXT:    sb a5, 472(sp)
+; VLEN512-NEXT:    srli a0, a5, 8
+; VLEN512-NEXT:    sb a0, 473(sp)
+; VLEN512-NEXT:    srli a0, a4, 56
+; VLEN512-NEXT:    sb a0, 471(sp)
+; VLEN512-NEXT:    srli a0, a4, 48
+; VLEN512-NEXT:    sb a0, 470(sp)
+; VLEN512-NEXT:    srli a0, a4, 40
+; VLEN512-NEXT:    sb a0, 469(sp)
+; VLEN512-NEXT:    srli a0, a4, 32
+; VLEN512-NEXT:    sb a0, 468(sp)
+; VLEN512-NEXT:    srli a0, a4, 24
+; VLEN512-NEXT:    sb a0, 467(sp)
+; VLEN512-NEXT:    srli a0, a4, 16
+; VLEN512-NEXT:    sb a0, 466(sp)
+; VLEN512-NEXT:    sb a4, 464(sp)
+; VLEN512-NEXT:    srli a0, a4, 8
+; VLEN512-NEXT:    sb a0, 465(sp)
+; VLEN512-NEXT:    srli a0, a3, 56
+; VLEN512-NEXT:    sb a0, 463(sp)
+; VLEN512-NEXT:    srli a0, a3, 48
+; VLEN512-NEXT:    sb a0, 462(sp)
+; VLEN512-NEXT:    srli a0, a3, 40
+; VLEN512-NEXT:    sb a0, 461(sp)
+; VLEN512-NEXT:    srli a0, a3, 32
+; VLEN512-NEXT:    sb a0, 460(sp)
+; VLEN512-NEXT:    srli a0, a3, 24
+; VLEN512-NEXT:    sb a0, 459(sp)
+; VLEN512-NEXT:    srli a0, a3, 16
+; VLEN512-NEXT:    sb a0, 458(sp)
+; VLEN512-NEXT:    sb a3, 456(sp)
+; VLEN512-NEXT:    srli a0, a3, 8
+; VLEN512-NEXT:    sb a0, 457(sp)
+; VLEN512-NEXT:    srli a0, s1, 56
+; VLEN512-NEXT:    sb a0, 455(sp)
+; VLEN512-NEXT:    srli a0, s1, 48
+; VLEN512-NEXT:    sb a0, 454(sp)
+; VLEN512-NEXT:    srli a0, s1, 40
+; VLEN512-NEXT:    sb a0, 453(sp)
+; VLEN512-NEXT:    srli a0, s1, 32
+; VLEN512-NEXT:    sb a0, 452(sp)
+; VLEN512-NEXT:    srli a0, s1, 24
+; VLEN512-NEXT:    sb a0, 451(sp)
+; VLEN512-NEXT:    srli a0, s1, 16
+; VLEN512-NEXT:    sb a0, 450(sp)
+; VLEN512-NEXT:    sb s1, 448(sp)
+; VLEN512-NEXT:    srli a0, s1, 8
+; VLEN512-NEXT:    sb a0, 449(sp)
+; VLEN512-NEXT:    srli a0, a2, 56
+; VLEN512-NEXT:    sb a0, 447(sp)
+; VLEN512-NEXT:    srli a0, a2, 48
+; VLEN512-NEXT:    sb a0, 446(sp)
+; VLEN512-NEXT:    srli a0, a2, 40
+; VLEN512-NEXT:    sb a0, 445(sp)
+; VLEN512-NEXT:    srli a0, a2, 32
+; VLEN512-NEXT:    sb a0, 444(sp)
+; VLEN512-NEXT:    srli a0, a2, 24
+; VLEN512-NEXT:    sb a0, 443(sp)
+; VLEN512-NEXT:    srli a0, a2, 16
+; VLEN512-NEXT:    sb a0, 442(sp)
+; VLEN512-NEXT:    sb a2, 440(sp)
+; VLEN512-NEXT:    srli a0, a2, 8
+; VLEN512-NEXT:    sb a0, 441(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 439(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 438(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 437(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 436(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 435(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 434(sp)
+; VLEN512-NEXT:    sb a1, 432(sp)
+; VLEN512-NEXT:    srli a0, a1, 8
+; VLEN512-NEXT:    sb a0, 433(sp)
+; VLEN512-NEXT:    srli a0, ra, 56
+; VLEN512-NEXT:    sb a0, 431(sp)
+; VLEN512-NEXT:    srli a0, ra, 48
+; VLEN512-NEXT:    sb a0, 430(sp)
+; VLEN512-NEXT:    srli a0, ra, 40
+; VLEN512-NEXT:    sb a0, 429(sp)
+; VLEN512-NEXT:    srli a0, ra, 32
+; VLEN512-NEXT:    sb a0, 428(sp)
+; VLEN512-NEXT:    srli a0, ra, 24
+; VLEN512-NEXT:    sb a0, 427(sp)
+; VLEN512-NEXT:    srli a0, ra, 16
+; VLEN512-NEXT:    sb a0, 426(sp)
+; VLEN512-NEXT:    sb ra, 424(sp)
+; VLEN512-NEXT:    srli a0, ra, 8
+; VLEN512-NEXT:    sb a0, 425(sp)
+; VLEN512-NEXT:    srli a0, s11, 56
+; VLEN512-NEXT:    sb a0, 423(sp)
+; VLEN512-NEXT:    srli a0, s11, 48
+; VLEN512-NEXT:    sb a0, 422(sp)
+; VLEN512-NEXT:    srli a0, s11, 40
+; VLEN512-NEXT:    sb a0, 421(sp)
+; VLEN512-NEXT:    srli a0, s11, 32
+; VLEN512-NEXT:    sb a0, 420(sp)
+; VLEN512-NEXT:    srli a0, s11, 24
+; VLEN512-NEXT:    sb a0, 419(sp)
+; VLEN512-NEXT:    srli a0, s11, 16
+; VLEN512-NEXT:    sb a0, 418(sp)
+; VLEN512-NEXT:    sb s11, 416(sp)
+; VLEN512-NEXT:    srli a0, s11, 8
+; VLEN512-NEXT:    sb a0, 417(sp)
+; VLEN512-NEXT:    srli a0, s10, 56
+; VLEN512-NEXT:    sb a0, 415(sp)
+; VLEN512-NEXT:    srli a0, s10, 48
+; VLEN512-NEXT:    sb a0, 414(sp)
+; VLEN512-NEXT:    srli a0, s10, 40
+; VLEN512-NEXT:    sb a0, 413(sp)
+; VLEN512-NEXT:    srli a0, s10, 32
+; VLEN512-NEXT:    sb a0, 412(sp)
+; VLEN512-NEXT:    srli a0, s10, 24
+; VLEN512-NEXT:    sb a0, 411(sp)
+; VLEN512-NEXT:    srli a0, s10, 16
+; VLEN512-NEXT:    sb a0, 410(sp)
+; VLEN512-NEXT:    sb s10, 408(sp)
+; VLEN512-NEXT:    srli a0, s10, 8
+; VLEN512-NEXT:    sb a0, 409(sp)
+; VLEN512-NEXT:    srli a0, s9, 56
+; VLEN512-NEXT:    sb a0, 407(sp)
+; VLEN512-NEXT:    srli a0, s9, 48
+; VLEN512-NEXT:    sb a0, 406(sp)
+; VLEN512-NEXT:    srli a0, s9, 40
+; VLEN512-NEXT:    sb a0, 405(sp)
+; VLEN512-NEXT:    srli a0, s9, 32
+; VLEN512-NEXT:    sb a0, 404(sp)
+; VLEN512-NEXT:    srli a0, s9, 24
+; VLEN512-NEXT:    sb a0, 403(sp)
+; VLEN512-NEXT:    srli a0, s9, 16
+; VLEN512-NEXT:    sb a0, 402(sp)
+; VLEN512-NEXT:    sb s9, 400(sp)
+; VLEN512-NEXT:    srli a0, s9, 8
+; VLEN512-NEXT:    sb a0, 401(sp)
+; VLEN512-NEXT:    srli a0, s8, 56
+; VLEN512-NEXT:    sb a0, 399(sp)
+; VLEN512-NEXT:    srli a0, s8, 48
+; VLEN512-NEXT:    sb a0, 398(sp)
+; VLEN512-NEXT:    srli a0, s8, 40
+; VLEN512-NEXT:    sb a0, 397(sp)
+; VLEN512-NEXT:    srli a0, s8, 32
+; VLEN512-NEXT:    sb a0, 396(sp)
+; VLEN512-NEXT:    srli a0, s8, 24
+; VLEN512-NEXT:    sb a0, 395(sp)
+; VLEN512-NEXT:    srli a0, s8, 16
+; VLEN512-NEXT:    sb a0, 394(sp)
+; VLEN512-NEXT:    sb s8, 392(sp)
+; VLEN512-NEXT:    srli a0, s8, 8
+; VLEN512-NEXT:    sb a0, 393(sp)
+; VLEN512-NEXT:    srli a0, s7, 56
+; VLEN512-NEXT:    sb a0, 391(sp)
+; VLEN512-NEXT:    srli a0, s7, 48
+; VLEN512-NEXT:    sb a0, 390(sp)
+; VLEN512-NEXT:    srli a0, s7, 40
+; VLEN512-NEXT:    sb a0, 389(sp)
+; VLEN512-NEXT:    srli a0, s7, 32
+; VLEN512-NEXT:    sb a0, 388(sp)
+; VLEN512-NEXT:    srli a0, s7, 24
+; VLEN512-NEXT:    sb a0, 387(sp)
+; VLEN512-NEXT:    srli a0, s7, 16
+; VLEN512-NEXT:    sb a0, 386(sp)
+; VLEN512-NEXT:    sb s7, 384(sp)
+; VLEN512-NEXT:    srli a0, s7, 8
+; VLEN512-NEXT:    sb a0, 385(sp)
+; VLEN512-NEXT:    srli a0, s6, 56
+; VLEN512-NEXT:    sb a0, 383(sp)
+; VLEN512-NEXT:    srli a0, s6, 48
+; VLEN512-NEXT:    sb a0, 382(sp)
+; VLEN512-NEXT:    srli a0, s6, 40
+; VLEN512-NEXT:    sb a0, 381(sp)
+; VLEN512-NEXT:    srli a0, s6, 32
+; VLEN512-NEXT:    sb a0, 380(sp)
+; VLEN512-NEXT:    srli a0, s6, 24
+; VLEN512-NEXT:    sb a0, 379(sp)
+; VLEN512-NEXT:    srli a0, s6, 16
+; VLEN512-NEXT:    sb a0, 378(sp)
+; VLEN512-NEXT:    sb s6, 376(sp)
+; VLEN512-NEXT:    srli a0, s6, 8
+; VLEN512-NEXT:    sb a0, 377(sp)
+; VLEN512-NEXT:    srli a0, s5, 56
+; VLEN512-NEXT:    sb a0, 375(sp)
+; VLEN512-NEXT:    srli a0, s5, 48
+; VLEN512-NEXT:    sb a0, 374(sp)
+; VLEN512-NEXT:    srli a0, s5, 40
+; VLEN512-NEXT:    sb a0, 373(sp)
+; VLEN512-NEXT:    srli a0, s5, 32
+; VLEN512-NEXT:    sb a0, 372(sp)
+; VLEN512-NEXT:    srli a0, s5, 24
+; VLEN512-NEXT:    sb a0, 371(sp)
+; VLEN512-NEXT:    srli a0, s5, 16
+; VLEN512-NEXT:    sb a0, 370(sp)
+; VLEN512-NEXT:    sb s5, 368(sp)
+; VLEN512-NEXT:    srli a0, s5, 8
+; VLEN512-NEXT:    sb a0, 369(sp)
+; VLEN512-NEXT:    srli a0, s4, 56
+; VLEN512-NEXT:    sb a0, 367(sp)
+; VLEN512-NEXT:    srli a0, s4, 48
+; VLEN512-NEXT:    sb a0, 366(sp)
+; VLEN512-NEXT:    srli a0, s4, 40
+; VLEN512-NEXT:    sb a0, 365(sp)
+; VLEN512-NEXT:    srli a0, s4, 32
+; VLEN512-NEXT:    sb a0, 364(sp)
+; VLEN512-NEXT:    srli a0, s4, 24
+; VLEN512-NEXT:    sb a0, 363(sp)
+; VLEN512-NEXT:    srli a0, s4, 16
+; VLEN512-NEXT:    sb a0, 362(sp)
+; VLEN512-NEXT:    sb s4, 360(sp)
+; VLEN512-NEXT:    srli a0, s4, 8
+; VLEN512-NEXT:    sb a0, 361(sp)
+; VLEN512-NEXT:    srli a0, s3, 56
+; VLEN512-NEXT:    sb a0, 359(sp)
+; VLEN512-NEXT:    srli a0, s3, 48
+; VLEN512-NEXT:    sb a0, 358(sp)
+; VLEN512-NEXT:    srli a0, s3, 40
+; VLEN512-NEXT:    sb a0, 357(sp)
+; VLEN512-NEXT:    srli a0, s3, 32
+; VLEN512-NEXT:    sb a0, 356(sp)
+; VLEN512-NEXT:    srli a0, s3, 24
+; VLEN512-NEXT:    sb a0, 355(sp)
+; VLEN512-NEXT:    srli a0, s3, 16
+; VLEN512-NEXT:    sb a0, 354(sp)
+; VLEN512-NEXT:    sb s3, 352(sp)
+; VLEN512-NEXT:    srli a0, s3, 8
+; VLEN512-NEXT:    sb a0, 353(sp)
+; VLEN512-NEXT:    srli a0, s2, 56
+; VLEN512-NEXT:    sb a0, 351(sp)
+; VLEN512-NEXT:    srli a0, s2, 48
+; VLEN512-NEXT:    sb a0, 350(sp)
+; VLEN512-NEXT:    srli a0, s2, 40
+; VLEN512-NEXT:    sb a0, 349(sp)
+; VLEN512-NEXT:    srli a0, s2, 32
+; VLEN512-NEXT:    sb a0, 348(sp)
+; VLEN512-NEXT:    srli a0, s2, 24
+; VLEN512-NEXT:    sb a0, 347(sp)
+; VLEN512-NEXT:    srli a0, s2, 16
+; VLEN512-NEXT:    sb a0, 346(sp)
+; VLEN512-NEXT:    sb s2, 344(sp)
+; VLEN512-NEXT:    srli a0, s2, 8
+; VLEN512-NEXT:    sb a0, 345(sp)
+; VLEN512-NEXT:    srli a0, t6, 56
+; VLEN512-NEXT:    sb a0, 343(sp)
+; VLEN512-NEXT:    srli a0, t6, 48
+; VLEN512-NEXT:    sb a0, 342(sp)
+; VLEN512-NEXT:    srli a0, t6, 40
+; VLEN512-NEXT:    sb a0, 341(sp)
+; VLEN512-NEXT:    srli a0, t6, 32
+; VLEN512-NEXT:    sb a0, 340(sp)
+; VLEN512-NEXT:    srli a0, t6, 24
+; VLEN512-NEXT:    sb a0, 339(sp)
+; VLEN512-NEXT:    srli a0, t6, 16
+; VLEN512-NEXT:    sb a0, 338(sp)
+; VLEN512-NEXT:    sb t6, 336(sp)
+; VLEN512-NEXT:    srli a0, t6, 8
+; VLEN512-NEXT:    sb a0, 337(sp)
+; VLEN512-NEXT:    srli a0, t5, 56
+; VLEN512-NEXT:    sb a0, 335(sp)
+; VLEN512-NEXT:    srli a0, t5, 48
+; VLEN512-NEXT:    sb a0, 334(sp)
+; VLEN512-NEXT:    srli a0, t5, 40
+; VLEN512-NEXT:    sb a0, 333(sp)
+; VLEN512-NEXT:    srli a0, t5, 32
+; VLEN512-NEXT:    sb a0, 332(sp)
+; VLEN512-NEXT:    srli a0, t5, 24
+; VLEN512-NEXT:    sb a0, 331(sp)
+; VLEN512-NEXT:    srli a0, t5, 16
+; VLEN512-NEXT:    sb a0, 330(sp)
+; VLEN512-NEXT:    sb t5, 328(sp)
+; VLEN512-NEXT:    srli a0, t5, 8
+; VLEN512-NEXT:    sb a0, 329(sp)
+; VLEN512-NEXT:    srli a0, t4, 56
+; VLEN512-NEXT:    sb a0, 327(sp)
+; VLEN512-NEXT:    srli a0, t4, 48
+; VLEN512-NEXT:    sb a0, 326(sp)
+; VLEN512-NEXT:    srli a0, t4, 40
+; VLEN512-NEXT:    sb a0, 325(sp)
+; VLEN512-NEXT:    srli a0, t4, 32
+; VLEN512-NEXT:    sb a0, 324(sp)
+; VLEN512-NEXT:    srli a0, t4, 24
+; VLEN512-NEXT:    sb a0, 323(sp)
+; VLEN512-NEXT:    srli a0, t4, 16
+; VLEN512-NEXT:    sb a0, 322(sp)
+; VLEN512-NEXT:    sb t4, 320(sp)
+; VLEN512-NEXT:    srli a0, t4, 8
+; VLEN512-NEXT:    sb a0, 321(sp)
+; VLEN512-NEXT:    srli a0, t3, 56
+; VLEN512-NEXT:    sb a0, 319(sp)
+; VLEN512-NEXT:    srli a0, t3, 48
+; VLEN512-NEXT:    sb a0, 318(sp)
+; VLEN512-NEXT:    srli a0, t3, 40
+; VLEN512-NEXT:    sb a0, 317(sp)
+; VLEN512-NEXT:    srli a0, t3, 32
+; VLEN512-NEXT:    sb a0, 316(sp)
+; VLEN512-NEXT:    srli a0, t3, 24
+; VLEN512-NEXT:    sb a0, 315(sp)
+; VLEN512-NEXT:    srli a0, t3, 16
+; VLEN512-NEXT:    sb a0, 314(sp)
+; VLEN512-NEXT:    sb t3, 312(sp)
+; VLEN512-NEXT:    srli a0, t3, 8
+; VLEN512-NEXT:    sb a0, 313(sp)
+; VLEN512-NEXT:    srli a0, t2, 56
+; VLEN512-NEXT:    sb a0, 311(sp)
+; VLEN512-NEXT:    srli a0, t2, 48
+; VLEN512-NEXT:    sb a0, 310(sp)
+; VLEN512-NEXT:    srli a0, t2, 40
+; VLEN512-NEXT:    sb a0, 309(sp)
+; VLEN512-NEXT:    srli a0, t2, 32
+; VLEN512-NEXT:    sb a0, 308(sp)
+; VLEN512-NEXT:    srli a0, t2, 24
+; VLEN512-NEXT:    sb a0, 307(sp)
+; VLEN512-NEXT:    srli a0, t2, 16
+; VLEN512-NEXT:    sb a0, 306(sp)
+; VLEN512-NEXT:    sb t2, 304(sp)
+; VLEN512-NEXT:    srli a0, t2, 8
+; VLEN512-NEXT:    sb a0, 305(sp)
+; VLEN512-NEXT:    srli a0, t1, 56
+; VLEN512-NEXT:    sb a0, 303(sp)
+; VLEN512-NEXT:    srli a0, t1, 48
+; VLEN512-NEXT:    sb a0, 302(sp)
+; VLEN512-NEXT:    srli a0, t1, 40
+; VLEN512-NEXT:    sb a0, 301(sp)
+; VLEN512-NEXT:    srli a0, t1, 32
+; VLEN512-NEXT:    sb a0, 300(sp)
+; VLEN512-NEXT:    srli a0, t1, 24
+; VLEN512-NEXT:    sb a0, 299(sp)
+; VLEN512-NEXT:    srli a0, t1, 16
+; VLEN512-NEXT:    sb a0, 298(sp)
+; VLEN512-NEXT:    sb t1, 296(sp)
+; VLEN512-NEXT:    srli a0, t1, 8
+; VLEN512-NEXT:    sb a0, 297(sp)
+; VLEN512-NEXT:    srli a0, t0, 56
+; VLEN512-NEXT:    sb a0, 295(sp)
+; VLEN512-NEXT:    srli a0, t0, 48
+; VLEN512-NEXT:    sb a0, 294(sp)
+; VLEN512-NEXT:    srli a0, t0, 40
+; VLEN512-NEXT:    sb a0, 293(sp)
+; VLEN512-NEXT:    srli a0, t0, 32
+; VLEN512-NEXT:    sb a0, 292(sp)
+; VLEN512-NEXT:    srli a0, t0, 24
+; VLEN512-NEXT:    sb a0, 291(sp)
+; VLEN512-NEXT:    srli a0, t0, 16
+; VLEN512-NEXT:    sb a0, 290(sp)
+; VLEN512-NEXT:    sb t0, 288(sp)
+; VLEN512-NEXT:    srli a0, t0, 8
+; VLEN512-NEXT:    sb a0, 289(sp)
+; VLEN512-NEXT:    srli a0, a7, 56
+; VLEN512-NEXT:    sb a0, 287(sp)
+; VLEN512-NEXT:    srli a0, a7, 48
+; VLEN512-NEXT:    sb a0, 286(sp)
+; VLEN512-NEXT:    srli a0, a7, 40
+; VLEN512-NEXT:    sb a0, 285(sp)
+; VLEN512-NEXT:    srli a0, a7, 32
+; VLEN512-NEXT:    sb a0, 284(sp)
+; VLEN512-NEXT:    srli a0, a7, 24
+; VLEN512-NEXT:    sb a0, 283(sp)
+; VLEN512-NEXT:    srli a0, a7, 16
+; VLEN512-NEXT:    sb a0, 282(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v0
+; VLEN512-NEXT:    sb a7, 280(sp)
+; VLEN512-NEXT:    srli a1, a7, 8
+; VLEN512-NEXT:    sb a1, 281(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 279(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 278(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 277(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 276(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 275(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 274(sp)
+; VLEN512-NEXT:    addi a1, sp, 920
+; VLEN512-NEXT:    vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    sb a0, 272(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 273(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 271(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 270(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 269(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 268(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 267(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 266(sp)
+; VLEN512-NEXT:    srli a0, a6, 16
+; VLEN512-NEXT:    sb a1, 264(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 265(sp)
+; VLEN512-NEXT:    srli a1, a6, 56
+; VLEN512-NEXT:    sb a1, 263(sp)
+; VLEN512-NEXT:    srli a1, a6, 48
+; VLEN512-NEXT:    sb a1, 262(sp)
+; VLEN512-NEXT:    srli a1, a6, 40
+; VLEN512-NEXT:    sb a1, 261(sp)
+; VLEN512-NEXT:    srli a1, a6, 32
+; VLEN512-NEXT:    sb a1, 260(sp)
+; VLEN512-NEXT:    srli a1, a6, 24
+; VLEN512-NEXT:    sb a1, 259(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v8
+; VLEN512-NEXT:    sb a0, 258(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a6, 256(sp)
+; VLEN512-NEXT:    srli a2, a6, 8
+; VLEN512-NEXT:    sb a2, 257(sp)
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 31
+; VLEN512-NEXT:    sb a0, 519(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 518(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 517(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 516(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 515(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 514(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 30
+; VLEN512-NEXT:    sb a1, 512(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 513(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 767(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 766(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 765(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 764(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 763(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 762(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 29
+; VLEN512-NEXT:    sb a0, 760(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 761(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 759(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 758(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 757(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 756(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 755(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 754(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 28
+; VLEN512-NEXT:    sb a1, 752(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 753(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 751(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 750(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 749(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 748(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 747(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 746(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 27
+; VLEN512-NEXT:    sb a0, 744(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 745(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 743(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 742(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 741(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 740(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 739(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 738(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 26
+; VLEN512-NEXT:    sb a1, 736(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 737(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 735(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 734(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 733(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 732(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 731(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 730(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 25
+; VLEN512-NEXT:    sb a0, 728(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 729(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 727(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 726(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 725(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 724(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 723(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 722(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 24
+; VLEN512-NEXT:    sb a1, 720(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 721(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 719(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 718(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 717(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 716(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 715(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 714(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 23
+; VLEN512-NEXT:    sb a0, 712(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 713(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 711(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 710(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 709(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 708(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 707(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 706(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 22
+; VLEN512-NEXT:    sb a1, 704(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 705(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 703(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 702(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 701(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 700(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 699(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 698(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 21
+; VLEN512-NEXT:    sb a0, 696(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 697(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 695(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 694(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 693(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 692(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 691(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 690(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 20
+; VLEN512-NEXT:    sb a1, 688(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 689(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 687(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 686(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 685(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 684(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 683(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 682(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 19
+; VLEN512-NEXT:    sb a0, 680(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 681(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 679(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 678(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 677(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 676(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 675(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 674(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 18
+; VLEN512-NEXT:    sb a1, 672(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 673(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 671(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 670(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 669(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 668(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 667(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 666(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 17
+; VLEN512-NEXT:    sb a0, 664(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 665(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 663(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 662(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 661(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 660(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 659(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 658(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 16
+; VLEN512-NEXT:    sb a1, 656(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 657(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 655(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 654(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 653(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 652(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 651(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 650(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 15
+; VLEN512-NEXT:    sb a0, 648(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 649(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 647(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 646(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 645(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 644(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 643(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 642(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 14
+; VLEN512-NEXT:    sb a1, 640(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 641(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 639(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 638(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 637(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 636(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 635(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 634(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 13
+; VLEN512-NEXT:    sb a0, 632(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 633(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 631(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 630(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 629(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 628(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 627(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 626(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 12
+; VLEN512-NEXT:    sb a1, 624(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 625(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 623(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 622(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 621(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 620(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 619(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 618(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 11
+; VLEN512-NEXT:    sb a0, 616(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 617(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 615(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 614(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 613(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 612(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 611(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 610(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 10
+; VLEN512-NEXT:    sb a1, 608(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 609(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 607(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 606(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 605(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 604(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 603(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 602(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 9
+; VLEN512-NEXT:    sb a0, 600(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 601(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 599(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 598(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 597(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 596(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 595(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 594(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 8
+; VLEN512-NEXT:    sb a1, 592(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 593(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 591(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 590(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 589(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 588(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 587(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 586(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 7
+; VLEN512-NEXT:    sb a0, 584(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 585(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 583(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 582(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 581(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 580(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 579(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 578(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 6
+; VLEN512-NEXT:    sb a1, 576(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 577(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 575(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 574(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 573(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 572(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 571(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 570(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 5
+; VLEN512-NEXT:    sb a0, 568(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 569(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 567(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 566(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 565(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 564(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 563(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 562(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 4
+; VLEN512-NEXT:    sb a1, 560(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 561(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 559(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 558(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 557(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 556(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 555(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 554(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 3
+; VLEN512-NEXT:    sb a0, 552(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 553(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 551(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 550(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 549(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 548(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 547(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 546(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    vslidedown.vi v24, v8, 1
+; VLEN512-NEXT:    vslidedown.vi v8, v8, 2
+; VLEN512-NEXT:    sb a1, 544(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 545(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 543(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 542(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 541(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 540(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 539(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 538(sp)
+; VLEN512-NEXT:    vmv.x.s a1, v8
+; VLEN512-NEXT:    sb a0, 536(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 537(sp)
+; VLEN512-NEXT:    srli a0, a1, 56
+; VLEN512-NEXT:    sb a0, 535(sp)
+; VLEN512-NEXT:    srli a0, a1, 48
+; VLEN512-NEXT:    sb a0, 534(sp)
+; VLEN512-NEXT:    srli a0, a1, 40
+; VLEN512-NEXT:    sb a0, 533(sp)
+; VLEN512-NEXT:    srli a0, a1, 32
+; VLEN512-NEXT:    sb a0, 532(sp)
+; VLEN512-NEXT:    srli a0, a1, 24
+; VLEN512-NEXT:    sb a0, 531(sp)
+; VLEN512-NEXT:    srli a0, a1, 16
+; VLEN512-NEXT:    sb a0, 530(sp)
+; VLEN512-NEXT:    vmv.x.s a0, v24
+; VLEN512-NEXT:    sb a1, 528(sp)
+; VLEN512-NEXT:    srli a1, a1, 8
+; VLEN512-NEXT:    sb a1, 529(sp)
+; VLEN512-NEXT:    srli a1, a0, 56
+; VLEN512-NEXT:    sb a1, 527(sp)
+; VLEN512-NEXT:    srli a1, a0, 48
+; VLEN512-NEXT:    sb a1, 526(sp)
+; VLEN512-NEXT:    srli a1, a0, 40
+; VLEN512-NEXT:    sb a1, 525(sp)
+; VLEN512-NEXT:    srli a1, a0, 32
+; VLEN512-NEXT:    sb a1, 524(sp)
+; VLEN512-NEXT:    srli a1, a0, 24
+; VLEN512-NEXT:    sb a1, 523(sp)
+; VLEN512-NEXT:    srli a1, a0, 16
+; VLEN512-NEXT:    sb a1, 522(sp)
+; VLEN512-NEXT:    sb a0, 520(sp)
+; VLEN512-NEXT:    srli a0, a0, 8
+; VLEN512-NEXT:    sb a0, 521(sp)
+; VLEN512-NEXT:    addi a0, zero, 256
+; VLEN512-NEXT:    vsetvli zero, a0, e8,m4,ta,mu
+; VLEN512-NEXT:    addi a0, sp, 512
+; VLEN512-NEXT:    vle8.v v28, (a0)
+; VLEN512-NEXT:    addi a0, sp, 256
+; VLEN512-NEXT:    vle8.v v12, (a0)
+; VLEN512-NEXT:    vadd.vv v8, v16, v28
+; VLEN512-NEXT:    vadd.vv v12, v20, v12
+; VLEN512-NEXT:    addi sp, s0, -1024
+; VLEN512-NEXT:    ld s11, 920(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s10, 928(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s9, 936(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s8, 944(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s7, 952(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s6, 960(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s5, 968(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s4, 976(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s3, 984(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s2, 992(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s1, 1000(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld s0, 1008(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    ld ra, 1016(sp) # 8-byte Folded Reload
+; VLEN512-NEXT:    addi sp, sp, 1024
+; VLEN512-NEXT:    ret
+  %c = bitcast <256 x i16> %a to <512 x i8>
+  %v = add <512 x i8> %b, %c
+  ret <512 x i8> %v
+}


        


More information about the llvm-commits mailing list