[llvm] a6cd1f6 - [RISCV] Adjust memcpy lowering test coverage w/V

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 24 12:32:23 PDT 2023


Author: Philip Reames
Date: 2023-07-24T12:32:17-07:00
New Revision: a6cd1f623dcf6fc443a1b1726d77b36c6b0caa83

URL: https://github.com/llvm/llvm-project/commit/a6cd1f623dcf6fc443a1b1726d77b36c6b0caa83
DIFF: https://github.com/llvm/llvm-project/commit/a6cd1f623dcf6fc443a1b1726d77b36c6b0caa83.diff

LOG: [RISCV] Adjust memcpy lowering test coverage w/V

This is fixing a mistake in 4f4f49137.

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll
index 1aa8600b114031..3f8e4d5a04743a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll
@@ -3,9 +3,9 @@
 ; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v \
 ; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem \
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \
 ; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \
 ; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
 
 ; ----------------------------------------------------------------------
@@ -196,10 +196,9 @@ define void @unaligned_memcpy8(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy8:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy8:
@@ -257,10 +256,9 @@ define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind {
 ; RV32-FAST-NEXT:    sw a2, 11(a0)
 ; RV32-FAST-NEXT:    lw a2, 8(a1)
 ; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy15:
@@ -292,22 +290,16 @@ define void @unaligned_memcpy16(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy16:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy16:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 16, i1 false)
@@ -369,18 +361,14 @@ define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
 ; RV32-FAST-NEXT:    sw a2, 27(a0)
 ; RV32-FAST-NEXT:    lw a2, 24(a1)
 ; RV32-FAST-NEXT:    sw a2, 24(a0)
-; RV32-FAST-NEXT:    lw a2, 20(a1)
-; RV32-FAST-NEXT:    sw a2, 20(a0)
-; RV32-FAST-NEXT:    lw a2, 16(a1)
-; RV32-FAST-NEXT:    sw a2, 16(a0)
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
+; RV32-FAST-NEXT:    addi a1, a1, 16
+; RV32-FAST-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    addi a0, a0, 16
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy31:
@@ -389,10 +377,9 @@ define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind {
 ; RV64-FAST-NEXT:    sd a2, 23(a0)
 ; RV64-FAST-NEXT:    ld a2, 16(a1)
 ; RV64-FAST-NEXT:    sd a2, 16(a0)
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false)
@@ -418,34 +405,16 @@ define void @unaligned_memcpy32(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy32:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lw a2, 28(a1)
-; RV32-FAST-NEXT:    sw a2, 28(a0)
-; RV32-FAST-NEXT:    lw a2, 24(a1)
-; RV32-FAST-NEXT:    sw a2, 24(a0)
-; RV32-FAST-NEXT:    lw a2, 20(a1)
-; RV32-FAST-NEXT:    sw a2, 20(a0)
-; RV32-FAST-NEXT:    lw a2, 16(a1)
-; RV32-FAST-NEXT:    sw a2, 16(a0)
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy32:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    ld a2, 24(a1)
-; RV64-FAST-NEXT:    sd a2, 24(a0)
-; RV64-FAST-NEXT:    ld a2, 16(a1)
-; RV64-FAST-NEXT:    sd a2, 16(a0)
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 32, i1 false)
@@ -471,58 +440,16 @@ define void @unaligned_memcpy64(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy64:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lw a2, 60(a1)
-; RV32-FAST-NEXT:    sw a2, 60(a0)
-; RV32-FAST-NEXT:    lw a2, 56(a1)
-; RV32-FAST-NEXT:    sw a2, 56(a0)
-; RV32-FAST-NEXT:    lw a2, 52(a1)
-; RV32-FAST-NEXT:    sw a2, 52(a0)
-; RV32-FAST-NEXT:    lw a2, 48(a1)
-; RV32-FAST-NEXT:    sw a2, 48(a0)
-; RV32-FAST-NEXT:    lw a2, 44(a1)
-; RV32-FAST-NEXT:    sw a2, 44(a0)
-; RV32-FAST-NEXT:    lw a2, 40(a1)
-; RV32-FAST-NEXT:    sw a2, 40(a0)
-; RV32-FAST-NEXT:    lw a2, 36(a1)
-; RV32-FAST-NEXT:    sw a2, 36(a0)
-; RV32-FAST-NEXT:    lw a2, 32(a1)
-; RV32-FAST-NEXT:    sw a2, 32(a0)
-; RV32-FAST-NEXT:    lw a2, 28(a1)
-; RV32-FAST-NEXT:    sw a2, 28(a0)
-; RV32-FAST-NEXT:    lw a2, 24(a1)
-; RV32-FAST-NEXT:    sw a2, 24(a0)
-; RV32-FAST-NEXT:    lw a2, 20(a1)
-; RV32-FAST-NEXT:    sw a2, 20(a0)
-; RV32-FAST-NEXT:    lw a2, 16(a1)
-; RV32-FAST-NEXT:    sw a2, 16(a0)
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy64:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    ld a2, 56(a1)
-; RV64-FAST-NEXT:    sd a2, 56(a0)
-; RV64-FAST-NEXT:    ld a2, 48(a1)
-; RV64-FAST-NEXT:    sd a2, 48(a0)
-; RV64-FAST-NEXT:    ld a2, 40(a1)
-; RV64-FAST-NEXT:    sd a2, 40(a0)
-; RV64-FAST-NEXT:    ld a2, 32(a1)
-; RV64-FAST-NEXT:    sd a2, 32(a0)
-; RV64-FAST-NEXT:    ld a2, 24(a1)
-; RV64-FAST-NEXT:    sd a2, 24(a0)
-; RV64-FAST-NEXT:    ld a2, 16(a1)
-; RV64-FAST-NEXT:    sd a2, 16(a0)
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 64, i1 false)
@@ -560,82 +487,26 @@ define void @unaligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy96:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lw a2, 92(a1)
-; RV32-FAST-NEXT:    sw a2, 92(a0)
-; RV32-FAST-NEXT:    lw a2, 88(a1)
-; RV32-FAST-NEXT:    sw a2, 88(a0)
-; RV32-FAST-NEXT:    lw a2, 84(a1)
-; RV32-FAST-NEXT:    sw a2, 84(a0)
-; RV32-FAST-NEXT:    lw a2, 80(a1)
-; RV32-FAST-NEXT:    sw a2, 80(a0)
-; RV32-FAST-NEXT:    lw a2, 76(a1)
-; RV32-FAST-NEXT:    sw a2, 76(a0)
-; RV32-FAST-NEXT:    lw a2, 72(a1)
-; RV32-FAST-NEXT:    sw a2, 72(a0)
-; RV32-FAST-NEXT:    lw a2, 68(a1)
-; RV32-FAST-NEXT:    sw a2, 68(a0)
-; RV32-FAST-NEXT:    lw a2, 64(a1)
-; RV32-FAST-NEXT:    sw a2, 64(a0)
-; RV32-FAST-NEXT:    lw a2, 60(a1)
-; RV32-FAST-NEXT:    sw a2, 60(a0)
-; RV32-FAST-NEXT:    lw a2, 56(a1)
-; RV32-FAST-NEXT:    sw a2, 56(a0)
-; RV32-FAST-NEXT:    lw a2, 52(a1)
-; RV32-FAST-NEXT:    sw a2, 52(a0)
-; RV32-FAST-NEXT:    lw a2, 48(a1)
-; RV32-FAST-NEXT:    sw a2, 48(a0)
-; RV32-FAST-NEXT:    lw a2, 44(a1)
-; RV32-FAST-NEXT:    sw a2, 44(a0)
-; RV32-FAST-NEXT:    lw a2, 40(a1)
-; RV32-FAST-NEXT:    sw a2, 40(a0)
-; RV32-FAST-NEXT:    lw a2, 36(a1)
-; RV32-FAST-NEXT:    sw a2, 36(a0)
-; RV32-FAST-NEXT:    lw a2, 32(a1)
-; RV32-FAST-NEXT:    sw a2, 32(a0)
-; RV32-FAST-NEXT:    lw a2, 28(a1)
-; RV32-FAST-NEXT:    sw a2, 28(a0)
-; RV32-FAST-NEXT:    lw a2, 24(a1)
-; RV32-FAST-NEXT:    sw a2, 24(a0)
-; RV32-FAST-NEXT:    lw a2, 20(a1)
-; RV32-FAST-NEXT:    sw a2, 20(a0)
-; RV32-FAST-NEXT:    lw a2, 16(a1)
-; RV32-FAST-NEXT:    sw a2, 16(a0)
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
+; RV32-FAST-NEXT:    addi a1, a1, 64
+; RV32-FAST-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    addi a0, a0, 64
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy96:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    ld a2, 88(a1)
-; RV64-FAST-NEXT:    sd a2, 88(a0)
-; RV64-FAST-NEXT:    ld a2, 80(a1)
-; RV64-FAST-NEXT:    sd a2, 80(a0)
-; RV64-FAST-NEXT:    ld a2, 72(a1)
-; RV64-FAST-NEXT:    sd a2, 72(a0)
-; RV64-FAST-NEXT:    ld a2, 64(a1)
-; RV64-FAST-NEXT:    sd a2, 64(a0)
-; RV64-FAST-NEXT:    ld a2, 56(a1)
-; RV64-FAST-NEXT:    sd a2, 56(a0)
-; RV64-FAST-NEXT:    ld a2, 48(a1)
-; RV64-FAST-NEXT:    sd a2, 48(a0)
-; RV64-FAST-NEXT:    ld a2, 40(a1)
-; RV64-FAST-NEXT:    sd a2, 40(a0)
-; RV64-FAST-NEXT:    ld a2, 32(a1)
-; RV64-FAST-NEXT:    sd a2, 32(a0)
-; RV64-FAST-NEXT:    ld a2, 24(a1)
-; RV64-FAST-NEXT:    sd a2, 24(a0)
-; RV64-FAST-NEXT:    ld a2, 16(a1)
-; RV64-FAST-NEXT:    sd a2, 16(a0)
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
+; RV64-FAST-NEXT:    addi a1, a1, 64
+; RV64-FAST-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    addi a0, a0, 64
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 96, i1 false)
@@ -661,106 +532,17 @@ define void @unaligned_memcpy128(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy128:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lw a2, 124(a1)
-; RV32-FAST-NEXT:    sw a2, 124(a0)
-; RV32-FAST-NEXT:    lw a2, 120(a1)
-; RV32-FAST-NEXT:    sw a2, 120(a0)
-; RV32-FAST-NEXT:    lw a2, 116(a1)
-; RV32-FAST-NEXT:    sw a2, 116(a0)
-; RV32-FAST-NEXT:    lw a2, 112(a1)
-; RV32-FAST-NEXT:    sw a2, 112(a0)
-; RV32-FAST-NEXT:    lw a2, 108(a1)
-; RV32-FAST-NEXT:    sw a2, 108(a0)
-; RV32-FAST-NEXT:    lw a2, 104(a1)
-; RV32-FAST-NEXT:    sw a2, 104(a0)
-; RV32-FAST-NEXT:    lw a2, 100(a1)
-; RV32-FAST-NEXT:    sw a2, 100(a0)
-; RV32-FAST-NEXT:    lw a2, 96(a1)
-; RV32-FAST-NEXT:    sw a2, 96(a0)
-; RV32-FAST-NEXT:    lw a2, 92(a1)
-; RV32-FAST-NEXT:    sw a2, 92(a0)
-; RV32-FAST-NEXT:    lw a2, 88(a1)
-; RV32-FAST-NEXT:    sw a2, 88(a0)
-; RV32-FAST-NEXT:    lw a2, 84(a1)
-; RV32-FAST-NEXT:    sw a2, 84(a0)
-; RV32-FAST-NEXT:    lw a2, 80(a1)
-; RV32-FAST-NEXT:    sw a2, 80(a0)
-; RV32-FAST-NEXT:    lw a2, 76(a1)
-; RV32-FAST-NEXT:    sw a2, 76(a0)
-; RV32-FAST-NEXT:    lw a2, 72(a1)
-; RV32-FAST-NEXT:    sw a2, 72(a0)
-; RV32-FAST-NEXT:    lw a2, 68(a1)
-; RV32-FAST-NEXT:    sw a2, 68(a0)
-; RV32-FAST-NEXT:    lw a2, 64(a1)
-; RV32-FAST-NEXT:    sw a2, 64(a0)
-; RV32-FAST-NEXT:    lw a2, 60(a1)
-; RV32-FAST-NEXT:    sw a2, 60(a0)
-; RV32-FAST-NEXT:    lw a2, 56(a1)
-; RV32-FAST-NEXT:    sw a2, 56(a0)
-; RV32-FAST-NEXT:    lw a2, 52(a1)
-; RV32-FAST-NEXT:    sw a2, 52(a0)
-; RV32-FAST-NEXT:    lw a2, 48(a1)
-; RV32-FAST-NEXT:    sw a2, 48(a0)
-; RV32-FAST-NEXT:    lw a2, 44(a1)
-; RV32-FAST-NEXT:    sw a2, 44(a0)
-; RV32-FAST-NEXT:    lw a2, 40(a1)
-; RV32-FAST-NEXT:    sw a2, 40(a0)
-; RV32-FAST-NEXT:    lw a2, 36(a1)
-; RV32-FAST-NEXT:    sw a2, 36(a0)
-; RV32-FAST-NEXT:    lw a2, 32(a1)
-; RV32-FAST-NEXT:    sw a2, 32(a0)
-; RV32-FAST-NEXT:    lw a2, 28(a1)
-; RV32-FAST-NEXT:    sw a2, 28(a0)
-; RV32-FAST-NEXT:    lw a2, 24(a1)
-; RV32-FAST-NEXT:    sw a2, 24(a0)
-; RV32-FAST-NEXT:    lw a2, 20(a1)
-; RV32-FAST-NEXT:    sw a2, 20(a0)
-; RV32-FAST-NEXT:    lw a2, 16(a1)
-; RV32-FAST-NEXT:    sw a2, 16(a0)
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    li a2, 32
+; RV32-FAST-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy128:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    ld a2, 120(a1)
-; RV64-FAST-NEXT:    sd a2, 120(a0)
-; RV64-FAST-NEXT:    ld a2, 112(a1)
-; RV64-FAST-NEXT:    sd a2, 112(a0)
-; RV64-FAST-NEXT:    ld a2, 104(a1)
-; RV64-FAST-NEXT:    sd a2, 104(a0)
-; RV64-FAST-NEXT:    ld a2, 96(a1)
-; RV64-FAST-NEXT:    sd a2, 96(a0)
-; RV64-FAST-NEXT:    ld a2, 88(a1)
-; RV64-FAST-NEXT:    sd a2, 88(a0)
-; RV64-FAST-NEXT:    ld a2, 80(a1)
-; RV64-FAST-NEXT:    sd a2, 80(a0)
-; RV64-FAST-NEXT:    ld a2, 72(a1)
-; RV64-FAST-NEXT:    sd a2, 72(a0)
-; RV64-FAST-NEXT:    ld a2, 64(a1)
-; RV64-FAST-NEXT:    sd a2, 64(a0)
-; RV64-FAST-NEXT:    ld a2, 56(a1)
-; RV64-FAST-NEXT:    sd a2, 56(a0)
-; RV64-FAST-NEXT:    ld a2, 48(a1)
-; RV64-FAST-NEXT:    sd a2, 48(a0)
-; RV64-FAST-NEXT:    ld a2, 40(a1)
-; RV64-FAST-NEXT:    sd a2, 40(a0)
-; RV64-FAST-NEXT:    ld a2, 32(a1)
-; RV64-FAST-NEXT:    sd a2, 32(a0)
-; RV64-FAST-NEXT:    ld a2, 24(a1)
-; RV64-FAST-NEXT:    sd a2, 24(a0)
-; RV64-FAST-NEXT:    ld a2, 16(a1)
-; RV64-FAST-NEXT:    sd a2, 16(a0)
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 128, i1 false)
@@ -808,158 +590,31 @@ define void @unaligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy196:
 ; RV32-FAST:       # %bb.0: # %entry
+; RV32-FAST-NEXT:    li a2, 32
+; RV32-FAST-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    lw a2, 192(a1)
 ; RV32-FAST-NEXT:    sw a2, 192(a0)
-; RV32-FAST-NEXT:    lw a2, 188(a1)
-; RV32-FAST-NEXT:    sw a2, 188(a0)
-; RV32-FAST-NEXT:    lw a2, 184(a1)
-; RV32-FAST-NEXT:    sw a2, 184(a0)
-; RV32-FAST-NEXT:    lw a2, 180(a1)
-; RV32-FAST-NEXT:    sw a2, 180(a0)
-; RV32-FAST-NEXT:    lw a2, 176(a1)
-; RV32-FAST-NEXT:    sw a2, 176(a0)
-; RV32-FAST-NEXT:    lw a2, 172(a1)
-; RV32-FAST-NEXT:    sw a2, 172(a0)
-; RV32-FAST-NEXT:    lw a2, 168(a1)
-; RV32-FAST-NEXT:    sw a2, 168(a0)
-; RV32-FAST-NEXT:    lw a2, 164(a1)
-; RV32-FAST-NEXT:    sw a2, 164(a0)
-; RV32-FAST-NEXT:    lw a2, 160(a1)
-; RV32-FAST-NEXT:    sw a2, 160(a0)
-; RV32-FAST-NEXT:    lw a2, 156(a1)
-; RV32-FAST-NEXT:    sw a2, 156(a0)
-; RV32-FAST-NEXT:    lw a2, 152(a1)
-; RV32-FAST-NEXT:    sw a2, 152(a0)
-; RV32-FAST-NEXT:    lw a2, 148(a1)
-; RV32-FAST-NEXT:    sw a2, 148(a0)
-; RV32-FAST-NEXT:    lw a2, 144(a1)
-; RV32-FAST-NEXT:    sw a2, 144(a0)
-; RV32-FAST-NEXT:    lw a2, 140(a1)
-; RV32-FAST-NEXT:    sw a2, 140(a0)
-; RV32-FAST-NEXT:    lw a2, 136(a1)
-; RV32-FAST-NEXT:    sw a2, 136(a0)
-; RV32-FAST-NEXT:    lw a2, 132(a1)
-; RV32-FAST-NEXT:    sw a2, 132(a0)
-; RV32-FAST-NEXT:    lw a2, 128(a1)
-; RV32-FAST-NEXT:    sw a2, 128(a0)
-; RV32-FAST-NEXT:    lw a2, 124(a1)
-; RV32-FAST-NEXT:    sw a2, 124(a0)
-; RV32-FAST-NEXT:    lw a2, 120(a1)
-; RV32-FAST-NEXT:    sw a2, 120(a0)
-; RV32-FAST-NEXT:    lw a2, 116(a1)
-; RV32-FAST-NEXT:    sw a2, 116(a0)
-; RV32-FAST-NEXT:    lw a2, 112(a1)
-; RV32-FAST-NEXT:    sw a2, 112(a0)
-; RV32-FAST-NEXT:    lw a2, 108(a1)
-; RV32-FAST-NEXT:    sw a2, 108(a0)
-; RV32-FAST-NEXT:    lw a2, 104(a1)
-; RV32-FAST-NEXT:    sw a2, 104(a0)
-; RV32-FAST-NEXT:    lw a2, 100(a1)
-; RV32-FAST-NEXT:    sw a2, 100(a0)
-; RV32-FAST-NEXT:    lw a2, 96(a1)
-; RV32-FAST-NEXT:    sw a2, 96(a0)
-; RV32-FAST-NEXT:    lw a2, 92(a1)
-; RV32-FAST-NEXT:    sw a2, 92(a0)
-; RV32-FAST-NEXT:    lw a2, 88(a1)
-; RV32-FAST-NEXT:    sw a2, 88(a0)
-; RV32-FAST-NEXT:    lw a2, 84(a1)
-; RV32-FAST-NEXT:    sw a2, 84(a0)
-; RV32-FAST-NEXT:    lw a2, 80(a1)
-; RV32-FAST-NEXT:    sw a2, 80(a0)
-; RV32-FAST-NEXT:    lw a2, 76(a1)
-; RV32-FAST-NEXT:    sw a2, 76(a0)
-; RV32-FAST-NEXT:    lw a2, 72(a1)
-; RV32-FAST-NEXT:    sw a2, 72(a0)
-; RV32-FAST-NEXT:    lw a2, 68(a1)
-; RV32-FAST-NEXT:    sw a2, 68(a0)
-; RV32-FAST-NEXT:    lw a2, 64(a1)
-; RV32-FAST-NEXT:    sw a2, 64(a0)
-; RV32-FAST-NEXT:    lw a2, 60(a1)
-; RV32-FAST-NEXT:    sw a2, 60(a0)
-; RV32-FAST-NEXT:    lw a2, 56(a1)
-; RV32-FAST-NEXT:    sw a2, 56(a0)
-; RV32-FAST-NEXT:    lw a2, 52(a1)
-; RV32-FAST-NEXT:    sw a2, 52(a0)
-; RV32-FAST-NEXT:    lw a2, 48(a1)
-; RV32-FAST-NEXT:    sw a2, 48(a0)
-; RV32-FAST-NEXT:    lw a2, 44(a1)
-; RV32-FAST-NEXT:    sw a2, 44(a0)
-; RV32-FAST-NEXT:    lw a2, 40(a1)
-; RV32-FAST-NEXT:    sw a2, 40(a0)
-; RV32-FAST-NEXT:    lw a2, 36(a1)
-; RV32-FAST-NEXT:    sw a2, 36(a0)
-; RV32-FAST-NEXT:    lw a2, 32(a1)
-; RV32-FAST-NEXT:    sw a2, 32(a0)
-; RV32-FAST-NEXT:    lw a2, 28(a1)
-; RV32-FAST-NEXT:    sw a2, 28(a0)
-; RV32-FAST-NEXT:    lw a2, 24(a1)
-; RV32-FAST-NEXT:    sw a2, 24(a0)
-; RV32-FAST-NEXT:    lw a2, 20(a1)
-; RV32-FAST-NEXT:    sw a2, 20(a0)
-; RV32-FAST-NEXT:    lw a2, 16(a1)
-; RV32-FAST-NEXT:    sw a2, 16(a0)
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    addi a1, a1, 128
+; RV32-FAST-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    addi a0, a0, 128
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy196:
 ; RV64-FAST:       # %bb.0: # %entry
 ; RV64-FAST-NEXT:    lw a2, 192(a1)
 ; RV64-FAST-NEXT:    sw a2, 192(a0)
-; RV64-FAST-NEXT:    ld a2, 184(a1)
-; RV64-FAST-NEXT:    sd a2, 184(a0)
-; RV64-FAST-NEXT:    ld a2, 176(a1)
-; RV64-FAST-NEXT:    sd a2, 176(a0)
-; RV64-FAST-NEXT:    ld a2, 168(a1)
-; RV64-FAST-NEXT:    sd a2, 168(a0)
-; RV64-FAST-NEXT:    ld a2, 160(a1)
-; RV64-FAST-NEXT:    sd a2, 160(a0)
-; RV64-FAST-NEXT:    ld a2, 152(a1)
-; RV64-FAST-NEXT:    sd a2, 152(a0)
-; RV64-FAST-NEXT:    ld a2, 144(a1)
-; RV64-FAST-NEXT:    sd a2, 144(a0)
-; RV64-FAST-NEXT:    ld a2, 136(a1)
-; RV64-FAST-NEXT:    sd a2, 136(a0)
-; RV64-FAST-NEXT:    ld a2, 128(a1)
-; RV64-FAST-NEXT:    sd a2, 128(a0)
-; RV64-FAST-NEXT:    ld a2, 120(a1)
-; RV64-FAST-NEXT:    sd a2, 120(a0)
-; RV64-FAST-NEXT:    ld a2, 112(a1)
-; RV64-FAST-NEXT:    sd a2, 112(a0)
-; RV64-FAST-NEXT:    ld a2, 104(a1)
-; RV64-FAST-NEXT:    sd a2, 104(a0)
-; RV64-FAST-NEXT:    ld a2, 96(a1)
-; RV64-FAST-NEXT:    sd a2, 96(a0)
-; RV64-FAST-NEXT:    ld a2, 88(a1)
-; RV64-FAST-NEXT:    sd a2, 88(a0)
-; RV64-FAST-NEXT:    ld a2, 80(a1)
-; RV64-FAST-NEXT:    sd a2, 80(a0)
-; RV64-FAST-NEXT:    ld a2, 72(a1)
-; RV64-FAST-NEXT:    sd a2, 72(a0)
-; RV64-FAST-NEXT:    ld a2, 64(a1)
-; RV64-FAST-NEXT:    sd a2, 64(a0)
-; RV64-FAST-NEXT:    ld a2, 56(a1)
-; RV64-FAST-NEXT:    sd a2, 56(a0)
-; RV64-FAST-NEXT:    ld a2, 48(a1)
-; RV64-FAST-NEXT:    sd a2, 48(a0)
-; RV64-FAST-NEXT:    ld a2, 40(a1)
-; RV64-FAST-NEXT:    sd a2, 40(a0)
-; RV64-FAST-NEXT:    ld a2, 32(a1)
-; RV64-FAST-NEXT:    sd a2, 32(a0)
-; RV64-FAST-NEXT:    ld a2, 24(a1)
-; RV64-FAST-NEXT:    sd a2, 24(a0)
-; RV64-FAST-NEXT:    ld a2, 16(a1)
-; RV64-FAST-NEXT:    sd a2, 16(a0)
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
+; RV64-FAST-NEXT:    addi a1, a1, 128
+; RV64-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    addi a0, a0, 128
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 196, i1 false)
@@ -993,202 +648,25 @@ define void @unaligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind {
 ;
 ; RV32-FAST-LABEL: unaligned_memcpy256:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lw a2, 252(a1)
-; RV32-FAST-NEXT:    sw a2, 252(a0)
-; RV32-FAST-NEXT:    lw a2, 248(a1)
-; RV32-FAST-NEXT:    sw a2, 248(a0)
-; RV32-FAST-NEXT:    lw a2, 244(a1)
-; RV32-FAST-NEXT:    sw a2, 244(a0)
-; RV32-FAST-NEXT:    lw a2, 240(a1)
-; RV32-FAST-NEXT:    sw a2, 240(a0)
-; RV32-FAST-NEXT:    lw a2, 236(a1)
-; RV32-FAST-NEXT:    sw a2, 236(a0)
-; RV32-FAST-NEXT:    lw a2, 232(a1)
-; RV32-FAST-NEXT:    sw a2, 232(a0)
-; RV32-FAST-NEXT:    lw a2, 228(a1)
-; RV32-FAST-NEXT:    sw a2, 228(a0)
-; RV32-FAST-NEXT:    lw a2, 224(a1)
-; RV32-FAST-NEXT:    sw a2, 224(a0)
-; RV32-FAST-NEXT:    lw a2, 220(a1)
-; RV32-FAST-NEXT:    sw a2, 220(a0)
-; RV32-FAST-NEXT:    lw a2, 216(a1)
-; RV32-FAST-NEXT:    sw a2, 216(a0)
-; RV32-FAST-NEXT:    lw a2, 212(a1)
-; RV32-FAST-NEXT:    sw a2, 212(a0)
-; RV32-FAST-NEXT:    lw a2, 208(a1)
-; RV32-FAST-NEXT:    sw a2, 208(a0)
-; RV32-FAST-NEXT:    lw a2, 204(a1)
-; RV32-FAST-NEXT:    sw a2, 204(a0)
-; RV32-FAST-NEXT:    lw a2, 200(a1)
-; RV32-FAST-NEXT:    sw a2, 200(a0)
-; RV32-FAST-NEXT:    lw a2, 196(a1)
-; RV32-FAST-NEXT:    sw a2, 196(a0)
-; RV32-FAST-NEXT:    lw a2, 192(a1)
-; RV32-FAST-NEXT:    sw a2, 192(a0)
-; RV32-FAST-NEXT:    lw a2, 188(a1)
-; RV32-FAST-NEXT:    sw a2, 188(a0)
-; RV32-FAST-NEXT:    lw a2, 184(a1)
-; RV32-FAST-NEXT:    sw a2, 184(a0)
-; RV32-FAST-NEXT:    lw a2, 180(a1)
-; RV32-FAST-NEXT:    sw a2, 180(a0)
-; RV32-FAST-NEXT:    lw a2, 176(a1)
-; RV32-FAST-NEXT:    sw a2, 176(a0)
-; RV32-FAST-NEXT:    lw a2, 172(a1)
-; RV32-FAST-NEXT:    sw a2, 172(a0)
-; RV32-FAST-NEXT:    lw a2, 168(a1)
-; RV32-FAST-NEXT:    sw a2, 168(a0)
-; RV32-FAST-NEXT:    lw a2, 164(a1)
-; RV32-FAST-NEXT:    sw a2, 164(a0)
-; RV32-FAST-NEXT:    lw a2, 160(a1)
-; RV32-FAST-NEXT:    sw a2, 160(a0)
-; RV32-FAST-NEXT:    lw a2, 156(a1)
-; RV32-FAST-NEXT:    sw a2, 156(a0)
-; RV32-FAST-NEXT:    lw a2, 152(a1)
-; RV32-FAST-NEXT:    sw a2, 152(a0)
-; RV32-FAST-NEXT:    lw a2, 148(a1)
-; RV32-FAST-NEXT:    sw a2, 148(a0)
-; RV32-FAST-NEXT:    lw a2, 144(a1)
-; RV32-FAST-NEXT:    sw a2, 144(a0)
-; RV32-FAST-NEXT:    lw a2, 140(a1)
-; RV32-FAST-NEXT:    sw a2, 140(a0)
-; RV32-FAST-NEXT:    lw a2, 136(a1)
-; RV32-FAST-NEXT:    sw a2, 136(a0)
-; RV32-FAST-NEXT:    lw a2, 132(a1)
-; RV32-FAST-NEXT:    sw a2, 132(a0)
-; RV32-FAST-NEXT:    lw a2, 128(a1)
-; RV32-FAST-NEXT:    sw a2, 128(a0)
-; RV32-FAST-NEXT:    lw a2, 124(a1)
-; RV32-FAST-NEXT:    sw a2, 124(a0)
-; RV32-FAST-NEXT:    lw a2, 120(a1)
-; RV32-FAST-NEXT:    sw a2, 120(a0)
-; RV32-FAST-NEXT:    lw a2, 116(a1)
-; RV32-FAST-NEXT:    sw a2, 116(a0)
-; RV32-FAST-NEXT:    lw a2, 112(a1)
-; RV32-FAST-NEXT:    sw a2, 112(a0)
-; RV32-FAST-NEXT:    lw a2, 108(a1)
-; RV32-FAST-NEXT:    sw a2, 108(a0)
-; RV32-FAST-NEXT:    lw a2, 104(a1)
-; RV32-FAST-NEXT:    sw a2, 104(a0)
-; RV32-FAST-NEXT:    lw a2, 100(a1)
-; RV32-FAST-NEXT:    sw a2, 100(a0)
-; RV32-FAST-NEXT:    lw a2, 96(a1)
-; RV32-FAST-NEXT:    sw a2, 96(a0)
-; RV32-FAST-NEXT:    lw a2, 92(a1)
-; RV32-FAST-NEXT:    sw a2, 92(a0)
-; RV32-FAST-NEXT:    lw a2, 88(a1)
-; RV32-FAST-NEXT:    sw a2, 88(a0)
-; RV32-FAST-NEXT:    lw a2, 84(a1)
-; RV32-FAST-NEXT:    sw a2, 84(a0)
-; RV32-FAST-NEXT:    lw a2, 80(a1)
-; RV32-FAST-NEXT:    sw a2, 80(a0)
-; RV32-FAST-NEXT:    lw a2, 76(a1)
-; RV32-FAST-NEXT:    sw a2, 76(a0)
-; RV32-FAST-NEXT:    lw a2, 72(a1)
-; RV32-FAST-NEXT:    sw a2, 72(a0)
-; RV32-FAST-NEXT:    lw a2, 68(a1)
-; RV32-FAST-NEXT:    sw a2, 68(a0)
-; RV32-FAST-NEXT:    lw a2, 64(a1)
-; RV32-FAST-NEXT:    sw a2, 64(a0)
-; RV32-FAST-NEXT:    lw a2, 60(a1)
-; RV32-FAST-NEXT:    sw a2, 60(a0)
-; RV32-FAST-NEXT:    lw a2, 56(a1)
-; RV32-FAST-NEXT:    sw a2, 56(a0)
-; RV32-FAST-NEXT:    lw a2, 52(a1)
-; RV32-FAST-NEXT:    sw a2, 52(a0)
-; RV32-FAST-NEXT:    lw a2, 48(a1)
-; RV32-FAST-NEXT:    sw a2, 48(a0)
-; RV32-FAST-NEXT:    lw a2, 44(a1)
-; RV32-FAST-NEXT:    sw a2, 44(a0)
-; RV32-FAST-NEXT:    lw a2, 40(a1)
-; RV32-FAST-NEXT:    sw a2, 40(a0)
-; RV32-FAST-NEXT:    lw a2, 36(a1)
-; RV32-FAST-NEXT:    sw a2, 36(a0)
-; RV32-FAST-NEXT:    lw a2, 32(a1)
-; RV32-FAST-NEXT:    sw a2, 32(a0)
-; RV32-FAST-NEXT:    lw a2, 28(a1)
-; RV32-FAST-NEXT:    sw a2, 28(a0)
-; RV32-FAST-NEXT:    lw a2, 24(a1)
-; RV32-FAST-NEXT:    sw a2, 24(a0)
-; RV32-FAST-NEXT:    lw a2, 20(a1)
-; RV32-FAST-NEXT:    sw a2, 20(a0)
-; RV32-FAST-NEXT:    lw a2, 16(a1)
-; RV32-FAST-NEXT:    sw a2, 16(a0)
-; RV32-FAST-NEXT:    lw a2, 12(a1)
-; RV32-FAST-NEXT:    sw a2, 12(a0)
-; RV32-FAST-NEXT:    lw a2, 8(a1)
-; RV32-FAST-NEXT:    sw a2, 8(a0)
-; RV32-FAST-NEXT:    lw a2, 4(a1)
-; RV32-FAST-NEXT:    sw a2, 4(a0)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    li a2, 32
+; RV32-FAST-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
+; RV32-FAST-NEXT:    addi a1, a1, 128
+; RV32-FAST-NEXT:    vle32.v v8, (a1)
+; RV32-FAST-NEXT:    addi a0, a0, 128
+; RV32-FAST-NEXT:    vse32.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: unaligned_memcpy256:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    ld a2, 248(a1)
-; RV64-FAST-NEXT:    sd a2, 248(a0)
-; RV64-FAST-NEXT:    ld a2, 240(a1)
-; RV64-FAST-NEXT:    sd a2, 240(a0)
-; RV64-FAST-NEXT:    ld a2, 232(a1)
-; RV64-FAST-NEXT:    sd a2, 232(a0)
-; RV64-FAST-NEXT:    ld a2, 224(a1)
-; RV64-FAST-NEXT:    sd a2, 224(a0)
-; RV64-FAST-NEXT:    ld a2, 216(a1)
-; RV64-FAST-NEXT:    sd a2, 216(a0)
-; RV64-FAST-NEXT:    ld a2, 208(a1)
-; RV64-FAST-NEXT:    sd a2, 208(a0)
-; RV64-FAST-NEXT:    ld a2, 200(a1)
-; RV64-FAST-NEXT:    sd a2, 200(a0)
-; RV64-FAST-NEXT:    ld a2, 192(a1)
-; RV64-FAST-NEXT:    sd a2, 192(a0)
-; RV64-FAST-NEXT:    ld a2, 184(a1)
-; RV64-FAST-NEXT:    sd a2, 184(a0)
-; RV64-FAST-NEXT:    ld a2, 176(a1)
-; RV64-FAST-NEXT:    sd a2, 176(a0)
-; RV64-FAST-NEXT:    ld a2, 168(a1)
-; RV64-FAST-NEXT:    sd a2, 168(a0)
-; RV64-FAST-NEXT:    ld a2, 160(a1)
-; RV64-FAST-NEXT:    sd a2, 160(a0)
-; RV64-FAST-NEXT:    ld a2, 152(a1)
-; RV64-FAST-NEXT:    sd a2, 152(a0)
-; RV64-FAST-NEXT:    ld a2, 144(a1)
-; RV64-FAST-NEXT:    sd a2, 144(a0)
-; RV64-FAST-NEXT:    ld a2, 136(a1)
-; RV64-FAST-NEXT:    sd a2, 136(a0)
-; RV64-FAST-NEXT:    ld a2, 128(a1)
-; RV64-FAST-NEXT:    sd a2, 128(a0)
-; RV64-FAST-NEXT:    ld a2, 120(a1)
-; RV64-FAST-NEXT:    sd a2, 120(a0)
-; RV64-FAST-NEXT:    ld a2, 112(a1)
-; RV64-FAST-NEXT:    sd a2, 112(a0)
-; RV64-FAST-NEXT:    ld a2, 104(a1)
-; RV64-FAST-NEXT:    sd a2, 104(a0)
-; RV64-FAST-NEXT:    ld a2, 96(a1)
-; RV64-FAST-NEXT:    sd a2, 96(a0)
-; RV64-FAST-NEXT:    ld a2, 88(a1)
-; RV64-FAST-NEXT:    sd a2, 88(a0)
-; RV64-FAST-NEXT:    ld a2, 80(a1)
-; RV64-FAST-NEXT:    sd a2, 80(a0)
-; RV64-FAST-NEXT:    ld a2, 72(a1)
-; RV64-FAST-NEXT:    sd a2, 72(a0)
-; RV64-FAST-NEXT:    ld a2, 64(a1)
-; RV64-FAST-NEXT:    sd a2, 64(a0)
-; RV64-FAST-NEXT:    ld a2, 56(a1)
-; RV64-FAST-NEXT:    sd a2, 56(a0)
-; RV64-FAST-NEXT:    ld a2, 48(a1)
-; RV64-FAST-NEXT:    sd a2, 48(a0)
-; RV64-FAST-NEXT:    ld a2, 40(a1)
-; RV64-FAST-NEXT:    sd a2, 40(a0)
-; RV64-FAST-NEXT:    ld a2, 32(a1)
-; RV64-FAST-NEXT:    sd a2, 32(a0)
-; RV64-FAST-NEXT:    ld a2, 24(a1)
-; RV64-FAST-NEXT:    sd a2, 24(a0)
-; RV64-FAST-NEXT:    ld a2, 16(a1)
-; RV64-FAST-NEXT:    sd a2, 16(a0)
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
+; RV64-FAST-NEXT:    addi a1, a1, 128
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    addi a0, a0, 128
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 256, i1 false)
@@ -1616,10 +1094,9 @@ define void @memcpy16_align4(ptr nocapture %dest, ptr nocapture %src) nounwind {
 ;
 ; RV64-FAST-LABEL: memcpy16_align4:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    ld a2, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 8(a0)
-; RV64-FAST-NEXT:    ld a1, 0(a1)
-; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-FAST-NEXT:    vle64.v v8, (a1)
+; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.inline.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i1 false)


        


More information about the llvm-commits mailing list