[llvm] eb3f2fe - [RISCV] Revise check names for unaligned memory op tests [nfc]
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 20 13:48:13 PDT 2023
Author: Philip Reames
Date: 2023-07-20T13:36:53-07:00
New Revision: eb3f2fe467f10b29c407e642e8a505ee6800c1d8
URL: https://github.com/llvm/llvm-project/commit/eb3f2fe467f10b29c407e642e8a505ee6800c1d8
DIFF: https://github.com/llvm/llvm-project/commit/eb3f2fe467f10b29c407e642e8a505ee6800c1d8.diff
LOG: [RISCV] Revise check names for unaligned memory op tests [nfc]
This has come up a few times in review; the current ones seem to be universally confusing. Even I as the original author of most of these get confused. Switch to using the SLOW/FAST naming used by x86, hopefully that's a bit clearer.
Added:
Modified:
llvm/test/CodeGen/RISCV/memcpy-inline.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
llvm/test/CodeGen/RISCV/unaligned-load-store.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/memcpy-inline.ll
index 4b1b01ce0151c3..5af375e9412831 100644
--- a/llvm/test/CodeGen/RISCV/memcpy-inline.ll
+++ b/llvm/test/CodeGen/RISCV/memcpy-inline.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv32 \
-; RUN: | FileCheck %s --check-prefixes=RV32,RV32ALIGNED
+; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32
; RUN: llc < %s -mtriple=riscv64 \
-; RUN: | FileCheck %s --check-prefixes=RV64,RV64ALIGNED
+; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64
; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \
-; RUN: | FileCheck %s --check-prefixes=RV32,RV32UNALIGNED
+; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \
-; RUN: | FileCheck %s --check-prefixes=RV64,RV64UNALIGNED
+; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
@src = external dso_local global %struct.x
@@ -21,336 +21,336 @@
@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
define i32 @t0() {
-; RV32-LABEL: t0:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a0, %hi(src)
-; RV32-NEXT: lw a1, %lo(src)(a0)
-; RV32-NEXT: lui a2, %hi(dst)
-; RV32-NEXT: sw a1, %lo(dst)(a2)
-; RV32-NEXT: addi a0, a0, %lo(src)
-; RV32-NEXT: lbu a1, 10(a0)
-; RV32-NEXT: lh a3, 8(a0)
-; RV32-NEXT: lw a0, 4(a0)
-; RV32-NEXT: addi a2, a2, %lo(dst)
-; RV32-NEXT: sb a1, 10(a2)
-; RV32-NEXT: sh a3, 8(a2)
-; RV32-NEXT: sw a0, 4(a2)
-; RV32-NEXT: li a0, 0
-; RV32-NEXT: ret
+; RV32-BOTH-LABEL: t0:
+; RV32-BOTH: # %bb.0: # %entry
+; RV32-BOTH-NEXT: lui a0, %hi(src)
+; RV32-BOTH-NEXT: lw a1, %lo(src)(a0)
+; RV32-BOTH-NEXT: lui a2, %hi(dst)
+; RV32-BOTH-NEXT: sw a1, %lo(dst)(a2)
+; RV32-BOTH-NEXT: addi a0, a0, %lo(src)
+; RV32-BOTH-NEXT: lbu a1, 10(a0)
+; RV32-BOTH-NEXT: lh a3, 8(a0)
+; RV32-BOTH-NEXT: lw a0, 4(a0)
+; RV32-BOTH-NEXT: addi a2, a2, %lo(dst)
+; RV32-BOTH-NEXT: sb a1, 10(a2)
+; RV32-BOTH-NEXT: sh a3, 8(a2)
+; RV32-BOTH-NEXT: sw a0, 4(a2)
+; RV32-BOTH-NEXT: li a0, 0
+; RV32-BOTH-NEXT: ret
;
-; RV64-LABEL: t0:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: lui a0, %hi(src)
-; RV64-NEXT: ld a1, %lo(src)(a0)
-; RV64-NEXT: lui a2, %hi(dst)
-; RV64-NEXT: addi a0, a0, %lo(src)
-; RV64-NEXT: lbu a3, 10(a0)
-; RV64-NEXT: lh a0, 8(a0)
-; RV64-NEXT: sd a1, %lo(dst)(a2)
-; RV64-NEXT: addi a1, a2, %lo(dst)
-; RV64-NEXT: sb a3, 10(a1)
-; RV64-NEXT: sh a0, 8(a1)
-; RV64-NEXT: li a0, 0
-; RV64-NEXT: ret
+; RV64-BOTH-LABEL: t0:
+; RV64-BOTH: # %bb.0: # %entry
+; RV64-BOTH-NEXT: lui a0, %hi(src)
+; RV64-BOTH-NEXT: ld a1, %lo(src)(a0)
+; RV64-BOTH-NEXT: lui a2, %hi(dst)
+; RV64-BOTH-NEXT: addi a0, a0, %lo(src)
+; RV64-BOTH-NEXT: lbu a3, 10(a0)
+; RV64-BOTH-NEXT: lh a0, 8(a0)
+; RV64-BOTH-NEXT: sd a1, %lo(dst)(a2)
+; RV64-BOTH-NEXT: addi a1, a2, %lo(dst)
+; RV64-BOTH-NEXT: sb a3, 10(a1)
+; RV64-BOTH-NEXT: sh a0, 8(a1)
+; RV64-BOTH-NEXT: li a0, 0
+; RV64-BOTH-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i32(ptr align 8 @dst, ptr align 8 @src, i32 11, i1 false)
ret i32 0
}
define void @t1(ptr nocapture %C) nounwind {
-; RV32-LABEL: t1:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a1, %hi(.L.str1)
-; RV32-NEXT: addi a1, a1, %lo(.L.str1)
-; RV32-NEXT: li a2, 31
-; RV32-NEXT: tail memcpy at plt
+; RV32-BOTH-LABEL: t1:
+; RV32-BOTH: # %bb.0: # %entry
+; RV32-BOTH-NEXT: lui a1, %hi(.L.str1)
+; RV32-BOTH-NEXT: addi a1, a1, %lo(.L.str1)
+; RV32-BOTH-NEXT: li a2, 31
+; RV32-BOTH-NEXT: tail memcpy at plt
;
-; RV64ALIGNED-LABEL: t1:
-; RV64ALIGNED: # %bb.0: # %entry
-; RV64ALIGNED-NEXT: lui a1, %hi(.L.str1)
-; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str1)
-; RV64ALIGNED-NEXT: li a2, 31
-; RV64ALIGNED-NEXT: tail memcpy at plt
+; RV64-LABEL: t1:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lui a1, %hi(.L.str1)
+; RV64-NEXT: addi a1, a1, %lo(.L.str1)
+; RV64-NEXT: li a2, 31
+; RV64-NEXT: tail memcpy at plt
;
-; RV64UNALIGNED-LABEL: t1:
-; RV64UNALIGNED: # %bb.0: # %entry
-; RV64UNALIGNED-NEXT: lui a1, %hi(.L.str1)
-; RV64UNALIGNED-NEXT: ld a2, %lo(.L.str1)(a1)
-; RV64UNALIGNED-NEXT: sd a2, 0(a0)
-; RV64UNALIGNED-NEXT: lui a2, 4
-; RV64UNALIGNED-NEXT: addiw a2, a2, 1870
-; RV64UNALIGNED-NEXT: sh a2, 28(a0)
-; RV64UNALIGNED-NEXT: lui a2, 300325
-; RV64UNALIGNED-NEXT: addiw a2, a2, 1107
-; RV64UNALIGNED-NEXT: addi a1, a1, %lo(.L.str1)
-; RV64UNALIGNED-NEXT: ld a3, 16(a1)
-; RV64UNALIGNED-NEXT: ld a1, 8(a1)
-; RV64UNALIGNED-NEXT: sw a2, 24(a0)
-; RV64UNALIGNED-NEXT: sb zero, 30(a0)
-; RV64UNALIGNED-NEXT: sd a3, 16(a0)
-; RV64UNALIGNED-NEXT: sd a1, 8(a0)
-; RV64UNALIGNED-NEXT: ret
+; RV64-FAST-LABEL: t1:
+; RV64-FAST: # %bb.0: # %entry
+; RV64-FAST-NEXT: lui a1, %hi(.L.str1)
+; RV64-FAST-NEXT: ld a2, %lo(.L.str1)(a1)
+; RV64-FAST-NEXT: sd a2, 0(a0)
+; RV64-FAST-NEXT: lui a2, 4
+; RV64-FAST-NEXT: addiw a2, a2, 1870
+; RV64-FAST-NEXT: sh a2, 28(a0)
+; RV64-FAST-NEXT: lui a2, 300325
+; RV64-FAST-NEXT: addiw a2, a2, 1107
+; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str1)
+; RV64-FAST-NEXT: ld a3, 16(a1)
+; RV64-FAST-NEXT: ld a1, 8(a1)
+; RV64-FAST-NEXT: sw a2, 24(a0)
+; RV64-FAST-NEXT: sb zero, 30(a0)
+; RV64-FAST-NEXT: sd a3, 16(a0)
+; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str1, i64 31, i1 false)
ret void
}
define void @t2(ptr nocapture %C) nounwind {
-; RV32-LABEL: t2:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lui a1, %hi(.L.str2)
-; RV32-NEXT: addi a1, a1, %lo(.L.str2)
-; RV32-NEXT: li a2, 36
-; RV32-NEXT: tail memcpy at plt
+; RV32-BOTH-LABEL: t2:
+; RV32-BOTH: # %bb.0: # %entry
+; RV32-BOTH-NEXT: lui a1, %hi(.L.str2)
+; RV32-BOTH-NEXT: addi a1, a1, %lo(.L.str2)
+; RV32-BOTH-NEXT: li a2, 36
+; RV32-BOTH-NEXT: tail memcpy at plt
;
-; RV64ALIGNED-LABEL: t2:
-; RV64ALIGNED: # %bb.0: # %entry
-; RV64ALIGNED-NEXT: lui a1, %hi(.L.str2)
-; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str2)
-; RV64ALIGNED-NEXT: li a2, 36
-; RV64ALIGNED-NEXT: tail memcpy at plt
+; RV64-LABEL: t2:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lui a1, %hi(.L.str2)
+; RV64-NEXT: addi a1, a1, %lo(.L.str2)
+; RV64-NEXT: li a2, 36
+; RV64-NEXT: tail memcpy at plt
;
-; RV64UNALIGNED-LABEL: t2:
-; RV64UNALIGNED: # %bb.0: # %entry
-; RV64UNALIGNED-NEXT: lui a1, %hi(.L.str2)
-; RV64UNALIGNED-NEXT: ld a2, %lo(.L.str2)(a1)
-; RV64UNALIGNED-NEXT: sd a2, 0(a0)
-; RV64UNALIGNED-NEXT: lui a2, 1156
-; RV64UNALIGNED-NEXT: addiw a2, a2, 332
-; RV64UNALIGNED-NEXT: addi a1, a1, %lo(.L.str2)
-; RV64UNALIGNED-NEXT: ld a3, 24(a1)
-; RV64UNALIGNED-NEXT: ld a4, 16(a1)
-; RV64UNALIGNED-NEXT: ld a1, 8(a1)
-; RV64UNALIGNED-NEXT: sw a2, 32(a0)
-; RV64UNALIGNED-NEXT: sd a3, 24(a0)
-; RV64UNALIGNED-NEXT: sd a4, 16(a0)
-; RV64UNALIGNED-NEXT: sd a1, 8(a0)
-; RV64UNALIGNED-NEXT: ret
+; RV64-FAST-LABEL: t2:
+; RV64-FAST: # %bb.0: # %entry
+; RV64-FAST-NEXT: lui a1, %hi(.L.str2)
+; RV64-FAST-NEXT: ld a2, %lo(.L.str2)(a1)
+; RV64-FAST-NEXT: sd a2, 0(a0)
+; RV64-FAST-NEXT: lui a2, 1156
+; RV64-FAST-NEXT: addiw a2, a2, 332
+; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str2)
+; RV64-FAST-NEXT: ld a3, 24(a1)
+; RV64-FAST-NEXT: ld a4, 16(a1)
+; RV64-FAST-NEXT: ld a1, 8(a1)
+; RV64-FAST-NEXT: sw a2, 32(a0)
+; RV64-FAST-NEXT: sd a3, 24(a0)
+; RV64-FAST-NEXT: sd a4, 16(a0)
+; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false)
ret void
}
define void @t3(ptr nocapture %C) nounwind {
-; RV32ALIGNED-LABEL: t3:
-; RV32ALIGNED: # %bb.0: # %entry
-; RV32ALIGNED-NEXT: lui a1, %hi(.L.str3)
-; RV32ALIGNED-NEXT: addi a1, a1, %lo(.L.str3)
-; RV32ALIGNED-NEXT: li a2, 24
-; RV32ALIGNED-NEXT: tail memcpy at plt
+; RV32-LABEL: t3:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a1, %hi(.L.str3)
+; RV32-NEXT: addi a1, a1, %lo(.L.str3)
+; RV32-NEXT: li a2, 24
+; RV32-NEXT: tail memcpy at plt
;
-; RV64ALIGNED-LABEL: t3:
-; RV64ALIGNED: # %bb.0: # %entry
-; RV64ALIGNED-NEXT: lui a1, %hi(.L.str3)
-; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str3)
-; RV64ALIGNED-NEXT: li a2, 24
-; RV64ALIGNED-NEXT: tail memcpy at plt
+; RV64-LABEL: t3:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lui a1, %hi(.L.str3)
+; RV64-NEXT: addi a1, a1, %lo(.L.str3)
+; RV64-NEXT: li a2, 24
+; RV64-NEXT: tail memcpy at plt
;
-; RV32UNALIGNED-LABEL: t3:
-; RV32UNALIGNED: # %bb.0: # %entry
-; RV32UNALIGNED-NEXT: lui a1, 1109
-; RV32UNALIGNED-NEXT: addi a1, a1, -689
-; RV32UNALIGNED-NEXT: sw a1, 20(a0)
-; RV32UNALIGNED-NEXT: lui a1, 340483
-; RV32UNALIGNED-NEXT: addi a1, a1, -947
-; RV32UNALIGNED-NEXT: sw a1, 16(a0)
-; RV32UNALIGNED-NEXT: lui a1, 267556
-; RV32UNALIGNED-NEXT: addi a1, a1, 1871
-; RV32UNALIGNED-NEXT: sw a1, 12(a0)
-; RV32UNALIGNED-NEXT: lui a1, 337154
-; RV32UNALIGNED-NEXT: addi a1, a1, 69
-; RV32UNALIGNED-NEXT: sw a1, 8(a0)
-; RV32UNALIGNED-NEXT: lui a1, 320757
-; RV32UNALIGNED-NEXT: addi a1, a1, 1107
-; RV32UNALIGNED-NEXT: sw a1, 4(a0)
-; RV32UNALIGNED-NEXT: lui a1, 365861
-; RV32UNALIGNED-NEXT: addi a1, a1, -1980
-; RV32UNALIGNED-NEXT: sw a1, 0(a0)
-; RV32UNALIGNED-NEXT: ret
+; RV32-FAST-LABEL: t3:
+; RV32-FAST: # %bb.0: # %entry
+; RV32-FAST-NEXT: lui a1, 1109
+; RV32-FAST-NEXT: addi a1, a1, -689
+; RV32-FAST-NEXT: sw a1, 20(a0)
+; RV32-FAST-NEXT: lui a1, 340483
+; RV32-FAST-NEXT: addi a1, a1, -947
+; RV32-FAST-NEXT: sw a1, 16(a0)
+; RV32-FAST-NEXT: lui a1, 267556
+; RV32-FAST-NEXT: addi a1, a1, 1871
+; RV32-FAST-NEXT: sw a1, 12(a0)
+; RV32-FAST-NEXT: lui a1, 337154
+; RV32-FAST-NEXT: addi a1, a1, 69
+; RV32-FAST-NEXT: sw a1, 8(a0)
+; RV32-FAST-NEXT: lui a1, 320757
+; RV32-FAST-NEXT: addi a1, a1, 1107
+; RV32-FAST-NEXT: sw a1, 4(a0)
+; RV32-FAST-NEXT: lui a1, 365861
+; RV32-FAST-NEXT: addi a1, a1, -1980
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
;
-; RV64UNALIGNED-LABEL: t3:
-; RV64UNALIGNED: # %bb.0: # %entry
-; RV64UNALIGNED-NEXT: lui a1, %hi(.L.str3)
-; RV64UNALIGNED-NEXT: ld a2, %lo(.L.str3)(a1)
-; RV64UNALIGNED-NEXT: addi a1, a1, %lo(.L.str3)
-; RV64UNALIGNED-NEXT: ld a3, 16(a1)
-; RV64UNALIGNED-NEXT: ld a1, 8(a1)
-; RV64UNALIGNED-NEXT: sd a2, 0(a0)
-; RV64UNALIGNED-NEXT: sd a3, 16(a0)
-; RV64UNALIGNED-NEXT: sd a1, 8(a0)
-; RV64UNALIGNED-NEXT: ret
+; RV64-FAST-LABEL: t3:
+; RV64-FAST: # %bb.0: # %entry
+; RV64-FAST-NEXT: lui a1, %hi(.L.str3)
+; RV64-FAST-NEXT: ld a2, %lo(.L.str3)(a1)
+; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str3)
+; RV64-FAST-NEXT: ld a3, 16(a1)
+; RV64-FAST-NEXT: ld a1, 8(a1)
+; RV64-FAST-NEXT: sd a2, 0(a0)
+; RV64-FAST-NEXT: sd a3, 16(a0)
+; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str3, i64 24, i1 false)
ret void
}
define void @t4(ptr nocapture %C) nounwind {
-; RV32ALIGNED-LABEL: t4:
-; RV32ALIGNED: # %bb.0: # %entry
-; RV32ALIGNED-NEXT: lui a1, %hi(.L.str4)
-; RV32ALIGNED-NEXT: addi a1, a1, %lo(.L.str4)
-; RV32ALIGNED-NEXT: li a2, 18
-; RV32ALIGNED-NEXT: tail memcpy at plt
+; RV32-LABEL: t4:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lui a1, %hi(.L.str4)
+; RV32-NEXT: addi a1, a1, %lo(.L.str4)
+; RV32-NEXT: li a2, 18
+; RV32-NEXT: tail memcpy at plt
;
-; RV64ALIGNED-LABEL: t4:
-; RV64ALIGNED: # %bb.0: # %entry
-; RV64ALIGNED-NEXT: lui a1, %hi(.L.str4)
-; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str4)
-; RV64ALIGNED-NEXT: li a2, 18
-; RV64ALIGNED-NEXT: tail memcpy at plt
+; RV64-LABEL: t4:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lui a1, %hi(.L.str4)
+; RV64-NEXT: addi a1, a1, %lo(.L.str4)
+; RV64-NEXT: li a2, 18
+; RV64-NEXT: tail memcpy at plt
;
-; RV32UNALIGNED-LABEL: t4:
-; RV32UNALIGNED: # %bb.0: # %entry
-; RV32UNALIGNED-NEXT: li a1, 32
-; RV32UNALIGNED-NEXT: sh a1, 16(a0)
-; RV32UNALIGNED-NEXT: lui a1, 132388
-; RV32UNALIGNED-NEXT: addi a1, a1, 1871
-; RV32UNALIGNED-NEXT: sw a1, 12(a0)
-; RV32UNALIGNED-NEXT: lui a1, 337154
-; RV32UNALIGNED-NEXT: addi a1, a1, 69
-; RV32UNALIGNED-NEXT: sw a1, 8(a0)
-; RV32UNALIGNED-NEXT: lui a1, 320757
-; RV32UNALIGNED-NEXT: addi a1, a1, 1107
-; RV32UNALIGNED-NEXT: sw a1, 4(a0)
-; RV32UNALIGNED-NEXT: lui a1, 365861
-; RV32UNALIGNED-NEXT: addi a1, a1, -1980
-; RV32UNALIGNED-NEXT: sw a1, 0(a0)
-; RV32UNALIGNED-NEXT: ret
+; RV32-FAST-LABEL: t4:
+; RV32-FAST: # %bb.0: # %entry
+; RV32-FAST-NEXT: li a1, 32
+; RV32-FAST-NEXT: sh a1, 16(a0)
+; RV32-FAST-NEXT: lui a1, 132388
+; RV32-FAST-NEXT: addi a1, a1, 1871
+; RV32-FAST-NEXT: sw a1, 12(a0)
+; RV32-FAST-NEXT: lui a1, 337154
+; RV32-FAST-NEXT: addi a1, a1, 69
+; RV32-FAST-NEXT: sw a1, 8(a0)
+; RV32-FAST-NEXT: lui a1, 320757
+; RV32-FAST-NEXT: addi a1, a1, 1107
+; RV32-FAST-NEXT: sw a1, 4(a0)
+; RV32-FAST-NEXT: lui a1, 365861
+; RV32-FAST-NEXT: addi a1, a1, -1980
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
;
-; RV64UNALIGNED-LABEL: t4:
-; RV64UNALIGNED: # %bb.0: # %entry
-; RV64UNALIGNED-NEXT: lui a1, %hi(.L.str4)
-; RV64UNALIGNED-NEXT: ld a2, %lo(.L.str4)(a1)
-; RV64UNALIGNED-NEXT: addi a1, a1, %lo(.L.str4)
-; RV64UNALIGNED-NEXT: ld a1, 8(a1)
-; RV64UNALIGNED-NEXT: li a3, 32
-; RV64UNALIGNED-NEXT: sh a3, 16(a0)
-; RV64UNALIGNED-NEXT: sd a2, 0(a0)
-; RV64UNALIGNED-NEXT: sd a1, 8(a0)
-; RV64UNALIGNED-NEXT: ret
+; RV64-FAST-LABEL: t4:
+; RV64-FAST: # %bb.0: # %entry
+; RV64-FAST-NEXT: lui a1, %hi(.L.str4)
+; RV64-FAST-NEXT: ld a2, %lo(.L.str4)(a1)
+; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str4)
+; RV64-FAST-NEXT: ld a1, 8(a1)
+; RV64-FAST-NEXT: li a3, 32
+; RV64-FAST-NEXT: sh a3, 16(a0)
+; RV64-FAST-NEXT: sd a2, 0(a0)
+; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str4, i64 18, i1 false)
ret void
}
define void @t5(ptr nocapture %C) nounwind {
-; RV32ALIGNED-LABEL: t5:
-; RV32ALIGNED: # %bb.0: # %entry
-; RV32ALIGNED-NEXT: sb zero, 6(a0)
-; RV32ALIGNED-NEXT: li a1, 84
-; RV32ALIGNED-NEXT: sb a1, 5(a0)
-; RV32ALIGNED-NEXT: li a1, 83
-; RV32ALIGNED-NEXT: sb a1, 4(a0)
-; RV32ALIGNED-NEXT: li a1, 89
-; RV32ALIGNED-NEXT: sb a1, 3(a0)
-; RV32ALIGNED-NEXT: li a1, 82
-; RV32ALIGNED-NEXT: sb a1, 2(a0)
-; RV32ALIGNED-NEXT: li a1, 72
-; RV32ALIGNED-NEXT: sb a1, 1(a0)
-; RV32ALIGNED-NEXT: li a1, 68
-; RV32ALIGNED-NEXT: sb a1, 0(a0)
-; RV32ALIGNED-NEXT: ret
+; RV32-LABEL: t5:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: sb zero, 6(a0)
+; RV32-NEXT: li a1, 84
+; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: li a1, 83
+; RV32-NEXT: sb a1, 4(a0)
+; RV32-NEXT: li a1, 89
+; RV32-NEXT: sb a1, 3(a0)
+; RV32-NEXT: li a1, 82
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: li a1, 72
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: li a1, 68
+; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: ret
;
-; RV64ALIGNED-LABEL: t5:
-; RV64ALIGNED: # %bb.0: # %entry
-; RV64ALIGNED-NEXT: sb zero, 6(a0)
-; RV64ALIGNED-NEXT: li a1, 84
-; RV64ALIGNED-NEXT: sb a1, 5(a0)
-; RV64ALIGNED-NEXT: li a1, 83
-; RV64ALIGNED-NEXT: sb a1, 4(a0)
-; RV64ALIGNED-NEXT: li a1, 89
-; RV64ALIGNED-NEXT: sb a1, 3(a0)
-; RV64ALIGNED-NEXT: li a1, 82
-; RV64ALIGNED-NEXT: sb a1, 2(a0)
-; RV64ALIGNED-NEXT: li a1, 72
-; RV64ALIGNED-NEXT: sb a1, 1(a0)
-; RV64ALIGNED-NEXT: li a1, 68
-; RV64ALIGNED-NEXT: sb a1, 0(a0)
-; RV64ALIGNED-NEXT: ret
+; RV64-LABEL: t5:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: sb zero, 6(a0)
+; RV64-NEXT: li a1, 84
+; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: li a1, 83
+; RV64-NEXT: sb a1, 4(a0)
+; RV64-NEXT: li a1, 89
+; RV64-NEXT: sb a1, 3(a0)
+; RV64-NEXT: li a1, 82
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: li a1, 72
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: li a1, 68
+; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: ret
;
-; RV32UNALIGNED-LABEL: t5:
-; RV32UNALIGNED: # %bb.0: # %entry
-; RV32UNALIGNED-NEXT: sb zero, 6(a0)
-; RV32UNALIGNED-NEXT: lui a1, 5
-; RV32UNALIGNED-NEXT: addi a1, a1, 1107
-; RV32UNALIGNED-NEXT: sh a1, 4(a0)
-; RV32UNALIGNED-NEXT: lui a1, 365861
-; RV32UNALIGNED-NEXT: addi a1, a1, -1980
-; RV32UNALIGNED-NEXT: sw a1, 0(a0)
-; RV32UNALIGNED-NEXT: ret
+; RV32-FAST-LABEL: t5:
+; RV32-FAST: # %bb.0: # %entry
+; RV32-FAST-NEXT: sb zero, 6(a0)
+; RV32-FAST-NEXT: lui a1, 5
+; RV32-FAST-NEXT: addi a1, a1, 1107
+; RV32-FAST-NEXT: sh a1, 4(a0)
+; RV32-FAST-NEXT: lui a1, 365861
+; RV32-FAST-NEXT: addi a1, a1, -1980
+; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: ret
;
-; RV64UNALIGNED-LABEL: t5:
-; RV64UNALIGNED: # %bb.0: # %entry
-; RV64UNALIGNED-NEXT: sb zero, 6(a0)
-; RV64UNALIGNED-NEXT: lui a1, 5
-; RV64UNALIGNED-NEXT: addiw a1, a1, 1107
-; RV64UNALIGNED-NEXT: sh a1, 4(a0)
-; RV64UNALIGNED-NEXT: lui a1, 365861
-; RV64UNALIGNED-NEXT: addiw a1, a1, -1980
-; RV64UNALIGNED-NEXT: sw a1, 0(a0)
-; RV64UNALIGNED-NEXT: ret
+; RV64-FAST-LABEL: t5:
+; RV64-FAST: # %bb.0: # %entry
+; RV64-FAST-NEXT: sb zero, 6(a0)
+; RV64-FAST-NEXT: lui a1, 5
+; RV64-FAST-NEXT: addiw a1, a1, 1107
+; RV64-FAST-NEXT: sh a1, 4(a0)
+; RV64-FAST-NEXT: lui a1, 365861
+; RV64-FAST-NEXT: addiw a1, a1, -1980
+; RV64-FAST-NEXT: sw a1, 0(a0)
+; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str5, i64 7, i1 false)
ret void
}
define void @t6() nounwind {
-; RV32ALIGNED-LABEL: t6:
-; RV32ALIGNED: # %bb.0: # %entry
-; RV32ALIGNED-NEXT: addi sp, sp, -16
-; RV32ALIGNED-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ALIGNED-NEXT: lui a0, %hi(spool.splbuf)
-; RV32ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf)
-; RV32ALIGNED-NEXT: lui a1, %hi(.L.str6)
-; RV32ALIGNED-NEXT: addi a1, a1, %lo(.L.str6)
-; RV32ALIGNED-NEXT: li a2, 14
-; RV32ALIGNED-NEXT: call memcpy at plt
-; RV32ALIGNED-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32ALIGNED-NEXT: addi sp, sp, 16
-; RV32ALIGNED-NEXT: ret
+; RV32-LABEL: t6:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: lui a0, %hi(spool.splbuf)
+; RV32-NEXT: addi a0, a0, %lo(spool.splbuf)
+; RV32-NEXT: lui a1, %hi(.L.str6)
+; RV32-NEXT: addi a1, a1, %lo(.L.str6)
+; RV32-NEXT: li a2, 14
+; RV32-NEXT: call memcpy at plt
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
;
-; RV64ALIGNED-LABEL: t6:
-; RV64ALIGNED: # %bb.0: # %entry
-; RV64ALIGNED-NEXT: addi sp, sp, -16
-; RV64ALIGNED-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64ALIGNED-NEXT: lui a0, %hi(spool.splbuf)
-; RV64ALIGNED-NEXT: addi a0, a0, %lo(spool.splbuf)
-; RV64ALIGNED-NEXT: lui a1, %hi(.L.str6)
-; RV64ALIGNED-NEXT: addi a1, a1, %lo(.L.str6)
-; RV64ALIGNED-NEXT: li a2, 14
-; RV64ALIGNED-NEXT: call memcpy at plt
-; RV64ALIGNED-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64ALIGNED-NEXT: addi sp, sp, 16
-; RV64ALIGNED-NEXT: ret
+; RV64-LABEL: t6:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: lui a0, %hi(spool.splbuf)
+; RV64-NEXT: addi a0, a0, %lo(spool.splbuf)
+; RV64-NEXT: lui a1, %hi(.L.str6)
+; RV64-NEXT: addi a1, a1, %lo(.L.str6)
+; RV64-NEXT: li a2, 14
+; RV64-NEXT: call memcpy at plt
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
;
-; RV32UNALIGNED-LABEL: t6:
-; RV32UNALIGNED: # %bb.0: # %entry
-; RV32UNALIGNED-NEXT: lui a0, %hi(spool.splbuf)
-; RV32UNALIGNED-NEXT: li a1, 88
-; RV32UNALIGNED-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
-; RV32UNALIGNED-NEXT: lui a1, 361862
-; RV32UNALIGNED-NEXT: addi a1, a1, -1960
-; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+8)(a0)
-; RV32UNALIGNED-NEXT: lui a1, 362199
-; RV32UNALIGNED-NEXT: addi a1, a1, 559
-; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf+4)(a0)
-; RV32UNALIGNED-NEXT: lui a1, 460503
-; RV32UNALIGNED-NEXT: addi a1, a1, 1071
-; RV32UNALIGNED-NEXT: sw a1, %lo(spool.splbuf)(a0)
-; RV32UNALIGNED-NEXT: ret
+; RV32-FAST-LABEL: t6:
+; RV32-FAST: # %bb.0: # %entry
+; RV32-FAST-NEXT: lui a0, %hi(spool.splbuf)
+; RV32-FAST-NEXT: li a1, 88
+; RV32-FAST-NEXT: sh a1, %lo(spool.splbuf+12)(a0)
+; RV32-FAST-NEXT: lui a1, 361862
+; RV32-FAST-NEXT: addi a1, a1, -1960
+; RV32-FAST-NEXT: sw a1, %lo(spool.splbuf+8)(a0)
+; RV32-FAST-NEXT: lui a1, 362199
+; RV32-FAST-NEXT: addi a1, a1, 559
+; RV32-FAST-NEXT: sw a1, %lo(spool.splbuf+4)(a0)
+; RV32-FAST-NEXT: lui a1, 460503
+; RV32-FAST-NEXT: addi a1, a1, 1071
+; RV32-FAST-NEXT: sw a1, %lo(spool.splbuf)(a0)
+; RV32-FAST-NEXT: ret
;
-; RV64UNALIGNED-LABEL: t6:
-; RV64UNALIGNED: # %bb.0: # %entry
-; RV64UNALIGNED-NEXT: lui a0, %hi(.L.str6)
-; RV64UNALIGNED-NEXT: ld a0, %lo(.L.str6)(a0)
-; RV64UNALIGNED-NEXT: lui a1, %hi(spool.splbuf)
-; RV64UNALIGNED-NEXT: li a2, 88
-; RV64UNALIGNED-NEXT: sh a2, %lo(spool.splbuf+12)(a1)
-; RV64UNALIGNED-NEXT: sd a0, %lo(spool.splbuf)(a1)
-; RV64UNALIGNED-NEXT: lui a0, 361862
-; RV64UNALIGNED-NEXT: addiw a0, a0, -1960
-; RV64UNALIGNED-NEXT: sw a0, %lo(spool.splbuf+8)(a1)
-; RV64UNALIGNED-NEXT: ret
+; RV64-FAST-LABEL: t6:
+; RV64-FAST: # %bb.0: # %entry
+; RV64-FAST-NEXT: lui a0, %hi(.L.str6)
+; RV64-FAST-NEXT: ld a0, %lo(.L.str6)(a0)
+; RV64-FAST-NEXT: lui a1, %hi(spool.splbuf)
+; RV64-FAST-NEXT: li a2, 88
+; RV64-FAST-NEXT: sh a2, %lo(spool.splbuf+12)(a1)
+; RV64-FAST-NEXT: sd a0, %lo(spool.splbuf)(a1)
+; RV64-FAST-NEXT: lui a0, 361862
+; RV64-FAST-NEXT: addiw a0, a0, -1960
+; RV64-FAST-NEXT: sw a0, %lo(spool.splbuf+8)(a1)
+; RV64-FAST-NEXT: ret
entry:
call void @llvm.memcpy.p0.p0.i64(ptr @spool.splbuf, ptr @.str6, i64 14, i1 false)
ret void
@@ -359,37 +359,37 @@ entry:
%struct.Foo = type { i32, i32, i32, i32 }
define void @t7(ptr nocapture %a, ptr nocapture %b) nounwind {
-; RV32-LABEL: t7:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: lw a2, 12(a1)
-; RV32-NEXT: sw a2, 12(a0)
-; RV32-NEXT: lw a2, 8(a1)
-; RV32-NEXT: sw a2, 8(a0)
-; RV32-NEXT: lw a2, 4(a1)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: lw a1, 0(a1)
-; RV32-NEXT: sw a1, 0(a0)
-; RV32-NEXT: ret
+; RV32-BOTH-LABEL: t7:
+; RV32-BOTH: # %bb.0: # %entry
+; RV32-BOTH-NEXT: lw a2, 12(a1)
+; RV32-BOTH-NEXT: sw a2, 12(a0)
+; RV32-BOTH-NEXT: lw a2, 8(a1)
+; RV32-BOTH-NEXT: sw a2, 8(a0)
+; RV32-BOTH-NEXT: lw a2, 4(a1)
+; RV32-BOTH-NEXT: sw a2, 4(a0)
+; RV32-BOTH-NEXT: lw a1, 0(a1)
+; RV32-BOTH-NEXT: sw a1, 0(a0)
+; RV32-BOTH-NEXT: ret
;
-; RV64ALIGNED-LABEL: t7:
-; RV64ALIGNED: # %bb.0: # %entry
-; RV64ALIGNED-NEXT: lw a2, 12(a1)
-; RV64ALIGNED-NEXT: sw a2, 12(a0)
-; RV64ALIGNED-NEXT: lw a2, 8(a1)
-; RV64ALIGNED-NEXT: sw a2, 8(a0)
-; RV64ALIGNED-NEXT: lw a2, 4(a1)
-; RV64ALIGNED-NEXT: sw a2, 4(a0)
-; RV64ALIGNED-NEXT: lw a1, 0(a1)
-; RV64ALIGNED-NEXT: sw a1, 0(a0)
-; RV64ALIGNED-NEXT: ret
+; RV64-LABEL: t7:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lw a2, 12(a1)
+; RV64-NEXT: sw a2, 12(a0)
+; RV64-NEXT: lw a2, 8(a1)
+; RV64-NEXT: sw a2, 8(a0)
+; RV64-NEXT: lw a2, 4(a1)
+; RV64-NEXT: sw a2, 4(a0)
+; RV64-NEXT: lw a1, 0(a1)
+; RV64-NEXT: sw a1, 0(a0)
+; RV64-NEXT: ret
;
-; RV64UNALIGNED-LABEL: t7:
-; RV64UNALIGNED: # %bb.0: # %entry
-; RV64UNALIGNED-NEXT: ld a2, 8(a1)
-; RV64UNALIGNED-NEXT: sd a2, 8(a0)
-; RV64UNALIGNED-NEXT: ld a1, 0(a1)
-; RV64UNALIGNED-NEXT: sd a1, 0(a0)
-; RV64UNALIGNED-NEXT: ret
+; RV64-FAST-LABEL: t7:
+; RV64-FAST: # %bb.0: # %entry
+; RV64-FAST-NEXT: ld a2, 8(a1)
+; RV64-FAST-NEXT: sd a2, 8(a0)
+; RV64-FAST-NEXT: ld a1, 0(a1)
+; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %a, ptr align 4 %b, i32 16, i1 false)
ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index fcc7cf7825ab88..e78d14cf55ca4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -1,45 +1,73 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
-; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: | FileCheck %s --check-prefixes=SLOW,RV32-SLOW
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
-; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: | FileCheck %s --check-prefixes=SLOW,RV64-SLOW
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+unaligned-vector-mem -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=FAST,RV32-FAST
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+unaligned-vector-mem -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=FAST,RV64-FAST
define <4 x i32> @load_v4i32_align1(ptr %ptr) {
-; CHECK-LABEL: load_v4i32_align1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: ret
+; SLOW-LABEL: load_v4i32_align1:
+; SLOW: # %bb.0:
+; SLOW-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; SLOW-NEXT: vle8.v v8, (a0)
+; SLOW-NEXT: ret
+;
+; FAST-LABEL: load_v4i32_align1:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; FAST-NEXT: vle32.v v8, (a0)
+; FAST-NEXT: ret
%z = load <4 x i32>, ptr %ptr, align 1
ret <4 x i32> %z
}
define <4 x i32> @load_v4i32_align2(ptr %ptr) {
-; CHECK-LABEL: load_v4i32_align2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: ret
+; SLOW-LABEL: load_v4i32_align2:
+; SLOW: # %bb.0:
+; SLOW-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; SLOW-NEXT: vle8.v v8, (a0)
+; SLOW-NEXT: ret
+;
+; FAST-LABEL: load_v4i32_align2:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; FAST-NEXT: vle32.v v8, (a0)
+; FAST-NEXT: ret
%z = load <4 x i32>, ptr %ptr, align 2
ret <4 x i32> %z
}
define void @store_v4i32_align1(<4 x i32> %x, ptr %ptr) {
-; CHECK-LABEL: store_v4i32_align1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: ret
+; SLOW-LABEL: store_v4i32_align1:
+; SLOW: # %bb.0:
+; SLOW-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; SLOW-NEXT: vse8.v v8, (a0)
+; SLOW-NEXT: ret
+;
+; FAST-LABEL: store_v4i32_align1:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; FAST-NEXT: vse32.v v8, (a0)
+; FAST-NEXT: ret
store <4 x i32> %x, ptr %ptr, align 1
ret void
}
define void @store_v4i32_align2(<4 x i32> %x, ptr %ptr) {
-; CHECK-LABEL: store_v4i32_align2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: ret
+; SLOW-LABEL: store_v4i32_align2:
+; SLOW: # %bb.0:
+; SLOW-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; SLOW-NEXT: vse8.v v8, (a0)
+; SLOW-NEXT: ret
+;
+; FAST-LABEL: store_v4i32_align2:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; FAST-NEXT: vse32.v v8, (a0)
+; FAST-NEXT: ret
store <4 x i32> %x, ptr %ptr, align 2
ret void
}
@@ -47,79 +75,93 @@ define void @store_v4i32_align2(<4 x i32> %x, ptr %ptr) {
declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
-; RV32-LABEL: mgather_v2i16_align1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB4_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a0, 2
-; RV32-NEXT: bnez a0, .LBB4_4
-; RV32-NEXT: .LBB4_2: # %else2
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB4_3: # %cond.load
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: lbu a2, 1(a1)
-; RV32-NEXT: lbu a1, 0(a1)
-; RV32-NEXT: slli a2, a2, 8
-; RV32-NEXT: or a1, a2, a1
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
-; RV32-NEXT: vmv.s.x v9, a1
-; RV32-NEXT: andi a0, a0, 2
-; RV32-NEXT: beqz a0, .LBB4_2
-; RV32-NEXT: .LBB4_4: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lbu a1, 1(a0)
-; RV32-NEXT: lbu a0, 0(a0)
-; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vslideup.vi v9, v8, 1
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
+; RV32-SLOW-LABEL: mgather_v2i16_align1:
+; RV32-SLOW: # %bb.0:
+; RV32-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a0, v0
+; RV32-SLOW-NEXT: andi a1, a0, 1
+; RV32-SLOW-NEXT: bnez a1, .LBB4_3
+; RV32-SLOW-NEXT: # %bb.1: # %else
+; RV32-SLOW-NEXT: andi a0, a0, 2
+; RV32-SLOW-NEXT: bnez a0, .LBB4_4
+; RV32-SLOW-NEXT: .LBB4_2: # %else2
+; RV32-SLOW-NEXT: vmv1r.v v8, v9
+; RV32-SLOW-NEXT: ret
+; RV32-SLOW-NEXT: .LBB4_3: # %cond.load
+; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: lbu a2, 1(a1)
+; RV32-SLOW-NEXT: lbu a1, 0(a1)
+; RV32-SLOW-NEXT: slli a2, a2, 8
+; RV32-SLOW-NEXT: or a1, a2, a1
+; RV32-SLOW-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
+; RV32-SLOW-NEXT: vmv.s.x v9, a1
+; RV32-SLOW-NEXT: andi a0, a0, 2
+; RV32-SLOW-NEXT: beqz a0, .LBB4_2
+; RV32-SLOW-NEXT: .LBB4_4: # %cond.load1
+; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1
+; RV32-SLOW-NEXT: vmv.x.s a0, v8
+; RV32-SLOW-NEXT: lbu a1, 1(a0)
+; RV32-SLOW-NEXT: lbu a0, 0(a0)
+; RV32-SLOW-NEXT: slli a1, a1, 8
+; RV32-SLOW-NEXT: or a0, a1, a0
+; RV32-SLOW-NEXT: vmv.s.x v8, a0
+; RV32-SLOW-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-SLOW-NEXT: vslideup.vi v9, v8, 1
+; RV32-SLOW-NEXT: vmv1r.v v8, v9
+; RV32-SLOW-NEXT: ret
;
-; RV64-LABEL: mgather_v2i16_align1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV64-NEXT: vmv.x.s a0, v0
-; RV64-NEXT: andi a1, a0, 1
-; RV64-NEXT: bnez a1, .LBB4_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a0, a0, 2
-; RV64-NEXT: bnez a0, .LBB4_4
-; RV64-NEXT: .LBB4_2: # %else2
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB4_3: # %cond.load
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: lbu a2, 1(a1)
-; RV64-NEXT: lbu a1, 0(a1)
-; RV64-NEXT: slli a2, a2, 8
-; RV64-NEXT: or a1, a2, a1
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
-; RV64-NEXT: vmv.s.x v9, a1
-; RV64-NEXT: andi a0, a0, 2
-; RV64-NEXT: beqz a0, .LBB4_2
-; RV64-NEXT: .LBB4_4: # %cond.load1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: lbu a1, 1(a0)
-; RV64-NEXT: lbu a0, 0(a0)
-; RV64-NEXT: slli a1, a1, 8
-; RV64-NEXT: or a0, a1, a0
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vslideup.vi v9, v8, 1
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; RV64-SLOW-LABEL: mgather_v2i16_align1:
+; RV64-SLOW: # %bb.0:
+; RV64-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a0, v0
+; RV64-SLOW-NEXT: andi a1, a0, 1
+; RV64-SLOW-NEXT: bnez a1, .LBB4_3
+; RV64-SLOW-NEXT: # %bb.1: # %else
+; RV64-SLOW-NEXT: andi a0, a0, 2
+; RV64-SLOW-NEXT: bnez a0, .LBB4_4
+; RV64-SLOW-NEXT: .LBB4_2: # %else2
+; RV64-SLOW-NEXT: vmv1r.v v8, v9
+; RV64-SLOW-NEXT: ret
+; RV64-SLOW-NEXT: .LBB4_3: # %cond.load
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a1, v8
+; RV64-SLOW-NEXT: lbu a2, 1(a1)
+; RV64-SLOW-NEXT: lbu a1, 0(a1)
+; RV64-SLOW-NEXT: slli a2, a2, 8
+; RV64-SLOW-NEXT: or a1, a2, a1
+; RV64-SLOW-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
+; RV64-SLOW-NEXT: vmv.s.x v9, a1
+; RV64-SLOW-NEXT: andi a0, a0, 2
+; RV64-SLOW-NEXT: beqz a0, .LBB4_2
+; RV64-SLOW-NEXT: .LBB4_4: # %cond.load1
+; RV64-SLOW-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1
+; RV64-SLOW-NEXT: vmv.x.s a0, v8
+; RV64-SLOW-NEXT: lbu a1, 1(a0)
+; RV64-SLOW-NEXT: lbu a0, 0(a0)
+; RV64-SLOW-NEXT: slli a1, a1, 8
+; RV64-SLOW-NEXT: or a0, a1, a0
+; RV64-SLOW-NEXT: vmv.s.x v8, a0
+; RV64-SLOW-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-SLOW-NEXT: vslideup.vi v9, v8, 1
+; RV64-SLOW-NEXT: vmv1r.v v8, v9
+; RV64-SLOW-NEXT: ret
+;
+; RV32-FAST-LABEL: mgather_v2i16_align1:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV32-FAST-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-FAST-NEXT: vmv1r.v v8, v9
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: mgather_v2i16_align1:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV64-FAST-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64-FAST-NEXT: vmv1r.v v8, v9
+; RV64-FAST-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i16> %passthru)
ret <2 x i16> %v
}
@@ -127,78 +169,92 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %
declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthru) {
-; RV32-LABEL: mgather_v2i64_align4:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB5_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a0, 2
-; RV32-NEXT: bnez a0, .LBB5_4
-; RV32-NEXT: .LBB5_2: # %else2
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB5_3: # %cond.load
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: lw a2, 0(a1)
-; RV32-NEXT: lw a1, 4(a1)
-; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV32-NEXT: vslide1down.vx v9, v9, a2
-; RV32-NEXT: vslide1down.vx v9, v9, a1
-; RV32-NEXT: andi a0, a0, 2
-; RV32-NEXT: beqz a0, .LBB5_2
-; RV32-NEXT: .LBB5_4: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lw a1, 0(a0)
-; RV32-NEXT: lw a0, 4(a0)
-; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT: vslideup.vi v9, v8, 1
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
+; RV32-SLOW-LABEL: mgather_v2i64_align4:
+; RV32-SLOW: # %bb.0:
+; RV32-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a0, v0
+; RV32-SLOW-NEXT: andi a1, a0, 1
+; RV32-SLOW-NEXT: bnez a1, .LBB5_3
+; RV32-SLOW-NEXT: # %bb.1: # %else
+; RV32-SLOW-NEXT: andi a0, a0, 2
+; RV32-SLOW-NEXT: bnez a0, .LBB5_4
+; RV32-SLOW-NEXT: .LBB5_2: # %else2
+; RV32-SLOW-NEXT: vmv1r.v v8, v9
+; RV32-SLOW-NEXT: ret
+; RV32-SLOW-NEXT: .LBB5_3: # %cond.load
+; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: lw a2, 0(a1)
+; RV32-SLOW-NEXT: lw a1, 4(a1)
+; RV32-SLOW-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV32-SLOW-NEXT: vslide1down.vx v9, v9, a2
+; RV32-SLOW-NEXT: vslide1down.vx v9, v9, a1
+; RV32-SLOW-NEXT: andi a0, a0, 2
+; RV32-SLOW-NEXT: beqz a0, .LBB5_2
+; RV32-SLOW-NEXT: .LBB5_4: # %cond.load1
+; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1
+; RV32-SLOW-NEXT: vmv.x.s a0, v8
+; RV32-SLOW-NEXT: lw a1, 0(a0)
+; RV32-SLOW-NEXT: lw a0, 4(a0)
+; RV32-SLOW-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV32-SLOW-NEXT: vslide1down.vx v8, v8, a1
+; RV32-SLOW-NEXT: vslide1down.vx v8, v8, a0
+; RV32-SLOW-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-SLOW-NEXT: vslideup.vi v9, v8, 1
+; RV32-SLOW-NEXT: vmv1r.v v8, v9
+; RV32-SLOW-NEXT: ret
+;
+; RV64-SLOW-LABEL: mgather_v2i64_align4:
+; RV64-SLOW: # %bb.0:
+; RV64-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a0, v0
+; RV64-SLOW-NEXT: andi a1, a0, 1
+; RV64-SLOW-NEXT: bnez a1, .LBB5_3
+; RV64-SLOW-NEXT: # %bb.1: # %else
+; RV64-SLOW-NEXT: andi a0, a0, 2
+; RV64-SLOW-NEXT: bnez a0, .LBB5_4
+; RV64-SLOW-NEXT: .LBB5_2: # %else2
+; RV64-SLOW-NEXT: vmv1r.v v8, v9
+; RV64-SLOW-NEXT: ret
+; RV64-SLOW-NEXT: .LBB5_3: # %cond.load
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a1, v8
+; RV64-SLOW-NEXT: lwu a2, 4(a1)
+; RV64-SLOW-NEXT: lwu a1, 0(a1)
+; RV64-SLOW-NEXT: slli a2, a2, 32
+; RV64-SLOW-NEXT: or a1, a2, a1
+; RV64-SLOW-NEXT: vsetivli zero, 2, e64, m1, tu, ma
+; RV64-SLOW-NEXT: vmv.s.x v9, a1
+; RV64-SLOW-NEXT: andi a0, a0, 2
+; RV64-SLOW-NEXT: beqz a0, .LBB5_2
+; RV64-SLOW-NEXT: .LBB5_4: # %cond.load1
+; RV64-SLOW-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1
+; RV64-SLOW-NEXT: vmv.x.s a0, v8
+; RV64-SLOW-NEXT: lwu a1, 4(a0)
+; RV64-SLOW-NEXT: lwu a0, 0(a0)
+; RV64-SLOW-NEXT: slli a1, a1, 32
+; RV64-SLOW-NEXT: or a0, a1, a0
+; RV64-SLOW-NEXT: vmv.s.x v8, a0
+; RV64-SLOW-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vslideup.vi v9, v8, 1
+; RV64-SLOW-NEXT: vmv1r.v v8, v9
+; RV64-SLOW-NEXT: ret
;
-; RV64-LABEL: mgather_v2i64_align4:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV64-NEXT: vmv.x.s a0, v0
-; RV64-NEXT: andi a1, a0, 1
-; RV64-NEXT: bnez a1, .LBB5_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a0, a0, 2
-; RV64-NEXT: bnez a0, .LBB5_4
-; RV64-NEXT: .LBB5_2: # %else2
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB5_3: # %cond.load
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: lwu a2, 4(a1)
-; RV64-NEXT: lwu a1, 0(a1)
-; RV64-NEXT: slli a2, a2, 32
-; RV64-NEXT: or a1, a2, a1
-; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
-; RV64-NEXT: vmv.s.x v9, a1
-; RV64-NEXT: andi a0, a0, 2
-; RV64-NEXT: beqz a0, .LBB5_2
-; RV64-NEXT: .LBB5_4: # %cond.load1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: lwu a1, 4(a0)
-; RV64-NEXT: lwu a0, 0(a0)
-; RV64-NEXT: slli a1, a1, 32
-; RV64-NEXT: or a0, a1, a0
-; RV64-NEXT: vmv.s.x v8, a0
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vslideup.vi v9, v8, 1
-; RV64-NEXT: vmv1r.v v8, v9
-; RV64-NEXT: ret
+; RV32-FAST-LABEL: mgather_v2i64_align4:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; RV32-FAST-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-FAST-NEXT: vmv.v.v v8, v9
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: mgather_v2i64_align4:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; RV64-FAST-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64-FAST-NEXT: vmv.v.v v8, v9
+; RV64-FAST-NEXT: ret
%v = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i64> %passthru)
ret <2 x i64> %v
}
@@ -206,131 +262,143 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %
declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
-; RV32-LABEL: mscatter_v4i16_align1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB6_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: bnez a1, .LBB6_6
-; RV32-NEXT: .LBB6_2: # %else2
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: bnez a1, .LBB6_7
-; RV32-NEXT: .LBB6_3: # %else4
-; RV32-NEXT: andi a0, a0, 8
-; RV32-NEXT: bnez a0, .LBB6_8
-; RV32-NEXT: .LBB6_4: # %else6
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB6_5: # %cond.store
-; RV32-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.x.s a2, v9
-; RV32-NEXT: sb a1, 0(a2)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a2)
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: beqz a1, .LBB6_2
-; RV32-NEXT: .LBB6_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v9, 1
-; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: sb a1, 0(a2)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a2)
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: beqz a1, .LBB6_3
-; RV32-NEXT: .LBB6_7: # %cond.store3
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v9, 2
-; RV32-NEXT: vmv.x.s a2, v10
-; RV32-NEXT: sb a1, 0(a2)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 1(a2)
-; RV32-NEXT: andi a0, a0, 8
-; RV32-NEXT: beqz a0, .LBB6_4
-; RV32-NEXT: .LBB6_8: # %cond.store5
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v9, 3
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: sb a0, 0(a1)
-; RV32-NEXT: srli a0, a0, 8
-; RV32-NEXT: sb a0, 1(a1)
-; RV32-NEXT: ret
+; RV32-SLOW-LABEL: mscatter_v4i16_align1:
+; RV32-SLOW: # %bb.0:
+; RV32-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a0, v0
+; RV32-SLOW-NEXT: andi a1, a0, 1
+; RV32-SLOW-NEXT: bnez a1, .LBB6_5
+; RV32-SLOW-NEXT: # %bb.1: # %else
+; RV32-SLOW-NEXT: andi a1, a0, 2
+; RV32-SLOW-NEXT: bnez a1, .LBB6_6
+; RV32-SLOW-NEXT: .LBB6_2: # %else2
+; RV32-SLOW-NEXT: andi a1, a0, 4
+; RV32-SLOW-NEXT: bnez a1, .LBB6_7
+; RV32-SLOW-NEXT: .LBB6_3: # %else4
+; RV32-SLOW-NEXT: andi a0, a0, 8
+; RV32-SLOW-NEXT: bnez a0, .LBB6_8
+; RV32-SLOW-NEXT: .LBB6_4: # %else6
+; RV32-SLOW-NEXT: ret
+; RV32-SLOW-NEXT: .LBB6_5: # %cond.store
+; RV32-SLOW-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a2, v9
+; RV32-SLOW-NEXT: sb a1, 0(a2)
+; RV32-SLOW-NEXT: srli a1, a1, 8
+; RV32-SLOW-NEXT: sb a1, 1(a2)
+; RV32-SLOW-NEXT: andi a1, a0, 2
+; RV32-SLOW-NEXT: beqz a1, .LBB6_2
+; RV32-SLOW-NEXT: .LBB6_6: # %cond.store1
+; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v10, v8, 1
+; RV32-SLOW-NEXT: vmv.x.s a1, v10
+; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v10, v9, 1
+; RV32-SLOW-NEXT: vmv.x.s a2, v10
+; RV32-SLOW-NEXT: sb a1, 0(a2)
+; RV32-SLOW-NEXT: srli a1, a1, 8
+; RV32-SLOW-NEXT: sb a1, 1(a2)
+; RV32-SLOW-NEXT: andi a1, a0, 4
+; RV32-SLOW-NEXT: beqz a1, .LBB6_3
+; RV32-SLOW-NEXT: .LBB6_7: # %cond.store3
+; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v10, v8, 2
+; RV32-SLOW-NEXT: vmv.x.s a1, v10
+; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v10, v9, 2
+; RV32-SLOW-NEXT: vmv.x.s a2, v10
+; RV32-SLOW-NEXT: sb a1, 0(a2)
+; RV32-SLOW-NEXT: srli a1, a1, 8
+; RV32-SLOW-NEXT: sb a1, 1(a2)
+; RV32-SLOW-NEXT: andi a0, a0, 8
+; RV32-SLOW-NEXT: beqz a0, .LBB6_4
+; RV32-SLOW-NEXT: .LBB6_8: # %cond.store5
+; RV32-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 3
+; RV32-SLOW-NEXT: vmv.x.s a0, v8
+; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v8, v9, 3
+; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: sb a0, 0(a1)
+; RV32-SLOW-NEXT: srli a0, a0, 8
+; RV32-SLOW-NEXT: sb a0, 1(a1)
+; RV32-SLOW-NEXT: ret
+;
+; RV64-SLOW-LABEL: mscatter_v4i16_align1:
+; RV64-SLOW: # %bb.0:
+; RV64-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a0, v0
+; RV64-SLOW-NEXT: andi a1, a0, 1
+; RV64-SLOW-NEXT: bnez a1, .LBB6_5
+; RV64-SLOW-NEXT: # %bb.1: # %else
+; RV64-SLOW-NEXT: andi a1, a0, 2
+; RV64-SLOW-NEXT: bnez a1, .LBB6_6
+; RV64-SLOW-NEXT: .LBB6_2: # %else2
+; RV64-SLOW-NEXT: andi a1, a0, 4
+; RV64-SLOW-NEXT: bnez a1, .LBB6_7
+; RV64-SLOW-NEXT: .LBB6_3: # %else4
+; RV64-SLOW-NEXT: andi a0, a0, 8
+; RV64-SLOW-NEXT: bnez a0, .LBB6_8
+; RV64-SLOW-NEXT: .LBB6_4: # %else6
+; RV64-SLOW-NEXT: ret
+; RV64-SLOW-NEXT: .LBB6_5: # %cond.store
+; RV64-SLOW-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a1, v8
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a2, v10
+; RV64-SLOW-NEXT: sb a1, 0(a2)
+; RV64-SLOW-NEXT: srli a1, a1, 8
+; RV64-SLOW-NEXT: sb a1, 1(a2)
+; RV64-SLOW-NEXT: andi a1, a0, 2
+; RV64-SLOW-NEXT: beqz a1, .LBB6_2
+; RV64-SLOW-NEXT: .LBB6_6: # %cond.store1
+; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v9, v8, 1
+; RV64-SLOW-NEXT: vmv.x.s a1, v9
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v12, v10, 1
+; RV64-SLOW-NEXT: vmv.x.s a2, v12
+; RV64-SLOW-NEXT: sb a1, 0(a2)
+; RV64-SLOW-NEXT: srli a1, a1, 8
+; RV64-SLOW-NEXT: sb a1, 1(a2)
+; RV64-SLOW-NEXT: andi a1, a0, 4
+; RV64-SLOW-NEXT: beqz a1, .LBB6_3
+; RV64-SLOW-NEXT: .LBB6_7: # %cond.store3
+; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v9, v8, 2
+; RV64-SLOW-NEXT: vmv.x.s a1, v9
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v12, v10, 2
+; RV64-SLOW-NEXT: vmv.x.s a2, v12
+; RV64-SLOW-NEXT: sb a1, 0(a2)
+; RV64-SLOW-NEXT: srli a1, a1, 8
+; RV64-SLOW-NEXT: sb a1, 1(a2)
+; RV64-SLOW-NEXT: andi a0, a0, 8
+; RV64-SLOW-NEXT: beqz a0, .LBB6_4
+; RV64-SLOW-NEXT: .LBB6_8: # %cond.store5
+; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 3
+; RV64-SLOW-NEXT: vmv.x.s a0, v8
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v8, v10, 3
+; RV64-SLOW-NEXT: vmv.x.s a1, v8
+; RV64-SLOW-NEXT: sb a0, 0(a1)
+; RV64-SLOW-NEXT: srli a0, a0, 8
+; RV64-SLOW-NEXT: sb a0, 1(a1)
+; RV64-SLOW-NEXT: ret
;
-; RV64-LABEL: mscatter_v4i16_align1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV64-NEXT: vmv.x.s a0, v0
-; RV64-NEXT: andi a1, a0, 1
-; RV64-NEXT: bnez a1, .LBB6_5
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a1, a0, 2
-; RV64-NEXT: bnez a1, .LBB6_6
-; RV64-NEXT: .LBB6_2: # %else2
-; RV64-NEXT: andi a1, a0, 4
-; RV64-NEXT: bnez a1, .LBB6_7
-; RV64-NEXT: .LBB6_3: # %else4
-; RV64-NEXT: andi a0, a0, 8
-; RV64-NEXT: bnez a0, .LBB6_8
-; RV64-NEXT: .LBB6_4: # %else6
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB6_5: # %cond.store
-; RV64-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.x.s a2, v10
-; RV64-NEXT: sb a1, 0(a2)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a2)
-; RV64-NEXT: andi a1, a0, 2
-; RV64-NEXT: beqz a1, .LBB6_2
-; RV64-NEXT: .LBB6_6: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vmv.x.s a1, v9
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v10, 1
-; RV64-NEXT: vmv.x.s a2, v12
-; RV64-NEXT: sb a1, 0(a2)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a2)
-; RV64-NEXT: andi a1, a0, 4
-; RV64-NEXT: beqz a1, .LBB6_3
-; RV64-NEXT: .LBB6_7: # %cond.store3
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 2
-; RV64-NEXT: vmv.x.s a1, v9
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v12, v10, 2
-; RV64-NEXT: vmv.x.s a2, v12
-; RV64-NEXT: sb a1, 0(a2)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 1(a2)
-; RV64-NEXT: andi a0, a0, 8
-; RV64-NEXT: beqz a0, .LBB6_4
-; RV64-NEXT: .LBB6_8: # %cond.store5
-; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v10, 3
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: sb a0, 0(a1)
-; RV64-NEXT: srli a0, a0, 8
-; RV64-NEXT: sb a0, 1(a1)
-; RV64-NEXT: ret
+; RV32-FAST-LABEL: mscatter_v4i16_align1:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-FAST-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: mscatter_v4i16_align1:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-FAST-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64-FAST-NEXT: ret
call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %m)
ret void
}
@@ -338,69 +406,81 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
-; RV32-LABEL: mscatter_v2i32_align2:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB7_3
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a0, 2
-; RV32-NEXT: bnez a0, .LBB7_4
-; RV32-NEXT: .LBB7_2: # %else2
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB7_3: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: vmv.x.s a2, v9
-; RV32-NEXT: sh a1, 0(a2)
-; RV32-NEXT: srli a1, a1, 16
-; RV32-NEXT: sh a1, 2(a2)
-; RV32-NEXT: andi a0, a0, 2
-; RV32-NEXT: beqz a0, .LBB7_2
-; RV32-NEXT: .LBB7_4: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: vslidedown.vi v8, v9, 1
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: sh a0, 0(a1)
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: sh a0, 2(a1)
-; RV32-NEXT: ret
+; RV32-SLOW-LABEL: mscatter_v2i32_align2:
+; RV32-SLOW: # %bb.0:
+; RV32-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a0, v0
+; RV32-SLOW-NEXT: andi a1, a0, 1
+; RV32-SLOW-NEXT: bnez a1, .LBB7_3
+; RV32-SLOW-NEXT: # %bb.1: # %else
+; RV32-SLOW-NEXT: andi a0, a0, 2
+; RV32-SLOW-NEXT: bnez a0, .LBB7_4
+; RV32-SLOW-NEXT: .LBB7_2: # %else2
+; RV32-SLOW-NEXT: ret
+; RV32-SLOW-NEXT: .LBB7_3: # %cond.store
+; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: vmv.x.s a2, v9
+; RV32-SLOW-NEXT: sh a1, 0(a2)
+; RV32-SLOW-NEXT: srli a1, a1, 16
+; RV32-SLOW-NEXT: sh a1, 2(a2)
+; RV32-SLOW-NEXT: andi a0, a0, 2
+; RV32-SLOW-NEXT: beqz a0, .LBB7_2
+; RV32-SLOW-NEXT: .LBB7_4: # %cond.store1
+; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1
+; RV32-SLOW-NEXT: vmv.x.s a0, v8
+; RV32-SLOW-NEXT: vslidedown.vi v8, v9, 1
+; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: sh a0, 0(a1)
+; RV32-SLOW-NEXT: srli a0, a0, 16
+; RV32-SLOW-NEXT: sh a0, 2(a1)
+; RV32-SLOW-NEXT: ret
+;
+; RV64-SLOW-LABEL: mscatter_v2i32_align2:
+; RV64-SLOW: # %bb.0:
+; RV64-SLOW-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a0, v0
+; RV64-SLOW-NEXT: andi a1, a0, 1
+; RV64-SLOW-NEXT: bnez a1, .LBB7_3
+; RV64-SLOW-NEXT: # %bb.1: # %else
+; RV64-SLOW-NEXT: andi a0, a0, 2
+; RV64-SLOW-NEXT: bnez a0, .LBB7_4
+; RV64-SLOW-NEXT: .LBB7_2: # %else2
+; RV64-SLOW-NEXT: ret
+; RV64-SLOW-NEXT: .LBB7_3: # %cond.store
+; RV64-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a1, v8
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a2, v9
+; RV64-SLOW-NEXT: sh a1, 0(a2)
+; RV64-SLOW-NEXT: srli a1, a1, 16
+; RV64-SLOW-NEXT: sh a1, 2(a2)
+; RV64-SLOW-NEXT: andi a0, a0, 2
+; RV64-SLOW-NEXT: beqz a0, .LBB7_2
+; RV64-SLOW-NEXT: .LBB7_4: # %cond.store1
+; RV64-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1
+; RV64-SLOW-NEXT: vmv.x.s a0, v8
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vslidedown.vi v8, v9, 1
+; RV64-SLOW-NEXT: vmv.x.s a1, v8
+; RV64-SLOW-NEXT: sh a0, 0(a1)
+; RV64-SLOW-NEXT: srli a0, a0, 16
+; RV64-SLOW-NEXT: sh a0, 2(a1)
+; RV64-SLOW-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i32_align2:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 0, e8, mf8, ta, ma
-; RV64-NEXT: vmv.x.s a0, v0
-; RV64-NEXT: andi a1, a0, 1
-; RV64-NEXT: bnez a1, .LBB7_3
-; RV64-NEXT: # %bb.1: # %else
-; RV64-NEXT: andi a0, a0, 2
-; RV64-NEXT: bnez a0, .LBB7_4
-; RV64-NEXT: .LBB7_2: # %else2
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB7_3: # %cond.store
-; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.x.s a2, v9
-; RV64-NEXT: sh a1, 0(a2)
-; RV64-NEXT: srli a1, a1, 16
-; RV64-NEXT: sh a1, 2(a2)
-; RV64-NEXT: andi a0, a0, 2
-; RV64-NEXT: beqz a0, .LBB7_2
-; RV64-NEXT: .LBB7_4: # %cond.store1
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vmv.x.s a0, v8
-; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v9, 1
-; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: sh a0, 0(a1)
-; RV64-NEXT: srli a0, a0, 16
-; RV64-NEXT: sh a0, 2(a1)
-; RV64-NEXT: ret
+; RV32-FAST-LABEL: mscatter_v2i32_align2:
+; RV32-FAST: # %bb.0:
+; RV32-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-FAST-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32-FAST-NEXT: ret
+;
+; RV64-FAST-LABEL: mscatter_v2i32_align2:
+; RV64-FAST: # %bb.0:
+; RV64-FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-FAST-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64-FAST-NEXT: ret
call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
ret void
}
@@ -408,93 +488,101 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
declare <2 x i32> @llvm.masked.load.v2i32(ptr, i32, <2 x i1>, <2 x i32>)
define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v2i32_align1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmseq.vi v8, v8, 0
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT: vmv.x.s a2, v8
-; RV32-NEXT: andi a3, a2, 1
-; RV32-NEXT: # implicit-def: $v8
-; RV32-NEXT: beqz a3, .LBB8_2
-; RV32-NEXT: # %bb.1: # %cond.load
-; RV32-NEXT: lbu a3, 1(a0)
-; RV32-NEXT: lbu a4, 0(a0)
-; RV32-NEXT: lbu a5, 2(a0)
-; RV32-NEXT: lbu a6, 3(a0)
-; RV32-NEXT: slli a3, a3, 8
-; RV32-NEXT: or a3, a3, a4
-; RV32-NEXT: slli a5, a5, 16
-; RV32-NEXT: slli a6, a6, 24
-; RV32-NEXT: or a4, a6, a5
-; RV32-NEXT: or a3, a4, a3
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a3
-; RV32-NEXT: .LBB8_2: # %else
-; RV32-NEXT: andi a2, a2, 2
-; RV32-NEXT: beqz a2, .LBB8_4
-; RV32-NEXT: # %bb.3: # %cond.load1
-; RV32-NEXT: lbu a2, 5(a0)
-; RV32-NEXT: lbu a3, 4(a0)
-; RV32-NEXT: lbu a4, 6(a0)
-; RV32-NEXT: lbu a0, 7(a0)
-; RV32-NEXT: slli a2, a2, 8
-; RV32-NEXT: or a2, a2, a3
-; RV32-NEXT: slli a4, a4, 16
-; RV32-NEXT: slli a0, a0, 24
-; RV32-NEXT: or a0, a0, a4
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vslideup.vi v8, v9, 1
-; RV32-NEXT: .LBB8_4: # %else2
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v8, (a1)
-; RV32-NEXT: ret
+; RV32-SLOW-LABEL: masked_load_v2i32_align1:
+; RV32-SLOW: # %bb.0:
+; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vmseq.vi v8, v8, 0
+; RV32-SLOW-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV32-SLOW-NEXT: vmv.x.s a2, v8
+; RV32-SLOW-NEXT: andi a3, a2, 1
+; RV32-SLOW-NEXT: # implicit-def: $v8
+; RV32-SLOW-NEXT: beqz a3, .LBB8_2
+; RV32-SLOW-NEXT: # %bb.1: # %cond.load
+; RV32-SLOW-NEXT: lbu a3, 1(a0)
+; RV32-SLOW-NEXT: lbu a4, 0(a0)
+; RV32-SLOW-NEXT: lbu a5, 2(a0)
+; RV32-SLOW-NEXT: lbu a6, 3(a0)
+; RV32-SLOW-NEXT: slli a3, a3, 8
+; RV32-SLOW-NEXT: or a3, a3, a4
+; RV32-SLOW-NEXT: slli a5, a5, 16
+; RV32-SLOW-NEXT: slli a6, a6, 24
+; RV32-SLOW-NEXT: or a4, a6, a5
+; RV32-SLOW-NEXT: or a3, a4, a3
+; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vmv.v.x v8, a3
+; RV32-SLOW-NEXT: .LBB8_2: # %else
+; RV32-SLOW-NEXT: andi a2, a2, 2
+; RV32-SLOW-NEXT: beqz a2, .LBB8_4
+; RV32-SLOW-NEXT: # %bb.3: # %cond.load1
+; RV32-SLOW-NEXT: lbu a2, 5(a0)
+; RV32-SLOW-NEXT: lbu a3, 4(a0)
+; RV32-SLOW-NEXT: lbu a4, 6(a0)
+; RV32-SLOW-NEXT: lbu a0, 7(a0)
+; RV32-SLOW-NEXT: slli a2, a2, 8
+; RV32-SLOW-NEXT: or a2, a2, a3
+; RV32-SLOW-NEXT: slli a4, a4, 16
+; RV32-SLOW-NEXT: slli a0, a0, 24
+; RV32-SLOW-NEXT: or a0, a0, a4
+; RV32-SLOW-NEXT: or a0, a0, a2
+; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vmv.s.x v9, a0
+; RV32-SLOW-NEXT: vslideup.vi v8, v9, 1
+; RV32-SLOW-NEXT: .LBB8_4: # %else2
+; RV32-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-SLOW-NEXT: vse32.v v8, (a1)
+; RV32-SLOW-NEXT: ret
+;
+; RV64-SLOW-LABEL: masked_load_v2i32_align1:
+; RV64-SLOW: # %bb.0:
+; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT: vmseq.vi v8, v8, 0
+; RV64-SLOW-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64-SLOW-NEXT: vmv.x.s a2, v8
+; RV64-SLOW-NEXT: andi a3, a2, 1
+; RV64-SLOW-NEXT: # implicit-def: $v8
+; RV64-SLOW-NEXT: beqz a3, .LBB8_2
+; RV64-SLOW-NEXT: # %bb.1: # %cond.load
+; RV64-SLOW-NEXT: lbu a3, 1(a0)
+; RV64-SLOW-NEXT: lbu a4, 0(a0)
+; RV64-SLOW-NEXT: lbu a5, 2(a0)
+; RV64-SLOW-NEXT: lb a6, 3(a0)
+; RV64-SLOW-NEXT: slli a3, a3, 8
+; RV64-SLOW-NEXT: or a3, a3, a4
+; RV64-SLOW-NEXT: slli a5, a5, 16
+; RV64-SLOW-NEXT: slli a6, a6, 24
+; RV64-SLOW-NEXT: or a4, a6, a5
+; RV64-SLOW-NEXT: or a3, a4, a3
+; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT: vmv.v.x v8, a3
+; RV64-SLOW-NEXT: .LBB8_2: # %else
+; RV64-SLOW-NEXT: andi a2, a2, 2
+; RV64-SLOW-NEXT: beqz a2, .LBB8_4
+; RV64-SLOW-NEXT: # %bb.3: # %cond.load1
+; RV64-SLOW-NEXT: lbu a2, 5(a0)
+; RV64-SLOW-NEXT: lbu a3, 4(a0)
+; RV64-SLOW-NEXT: lbu a4, 6(a0)
+; RV64-SLOW-NEXT: lb a0, 7(a0)
+; RV64-SLOW-NEXT: slli a2, a2, 8
+; RV64-SLOW-NEXT: or a2, a2, a3
+; RV64-SLOW-NEXT: slli a4, a4, 16
+; RV64-SLOW-NEXT: slli a0, a0, 24
+; RV64-SLOW-NEXT: or a0, a0, a4
+; RV64-SLOW-NEXT: or a0, a0, a2
+; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT: vmv.s.x v9, a0
+; RV64-SLOW-NEXT: vslideup.vi v8, v9, 1
+; RV64-SLOW-NEXT: .LBB8_4: # %else2
+; RV64-SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64-SLOW-NEXT: vse32.v v8, (a1)
+; RV64-SLOW-NEXT: ret
;
-; RV64-LABEL: masked_load_v2i32_align1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmseq.vi v8, v8, 0
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vmv.x.s a2, v8
-; RV64-NEXT: andi a3, a2, 1
-; RV64-NEXT: # implicit-def: $v8
-; RV64-NEXT: beqz a3, .LBB8_2
-; RV64-NEXT: # %bb.1: # %cond.load
-; RV64-NEXT: lbu a3, 1(a0)
-; RV64-NEXT: lbu a4, 0(a0)
-; RV64-NEXT: lbu a5, 2(a0)
-; RV64-NEXT: lb a6, 3(a0)
-; RV64-NEXT: slli a3, a3, 8
-; RV64-NEXT: or a3, a3, a4
-; RV64-NEXT: slli a5, a5, 16
-; RV64-NEXT: slli a6, a6, 24
-; RV64-NEXT: or a4, a6, a5
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a3
-; RV64-NEXT: .LBB8_2: # %else
-; RV64-NEXT: andi a2, a2, 2
-; RV64-NEXT: beqz a2, .LBB8_4
-; RV64-NEXT: # %bb.3: # %cond.load1
-; RV64-NEXT: lbu a2, 5(a0)
-; RV64-NEXT: lbu a3, 4(a0)
-; RV64-NEXT: lbu a4, 6(a0)
-; RV64-NEXT: lb a0, 7(a0)
-; RV64-NEXT: slli a2, a2, 8
-; RV64-NEXT: or a2, a2, a3
-; RV64-NEXT: slli a4, a4, 16
-; RV64-NEXT: slli a0, a0, 24
-; RV64-NEXT: or a0, a0, a4
-; RV64-NEXT: or a0, a0, a2
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vslideup.vi v8, v9, 1
-; RV64-NEXT: .LBB8_4: # %else2
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vse32.v v8, (a1)
-; RV64-NEXT: ret
+; FAST-LABEL: masked_load_v2i32_align1:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; FAST-NEXT: vmseq.vi v0, v8, 0
+; FAST-NEXT: vle32.v v8, (a0), v0.t
+; FAST-NEXT: vse32.v v8, (a1)
+; FAST-NEXT: ret
%mask = icmp eq <2 x i32> %m, zeroinitializer
%load = call <2 x i32> @llvm.masked.load.v2i32(ptr %a, i32 1, <2 x i1> %mask, <2 x i32> undef)
store <2 x i32> %load, ptr %res_ptr
@@ -504,35 +592,42 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi
declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nounwind {
-; CHECK-LABEL: masked_store_v2i32_align2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmseq.vi v9, v9, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v9
-; CHECK-NEXT: andi a2, a1, 1
-; CHECK-NEXT: bnez a2, .LBB9_3
-; CHECK-NEXT: # %bb.1: # %else
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: bnez a1, .LBB9_4
-; CHECK-NEXT: .LBB9_2: # %else2
-; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB9_3: # %cond.store
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.x.s a2, v8
-; CHECK-NEXT: sh a2, 0(a0)
-; CHECK-NEXT: srli a2, a2, 16
-; CHECK-NEXT: sh a2, 2(a0)
-; CHECK-NEXT: andi a1, a1, 2
-; CHECK-NEXT: beqz a1, .LBB9_2
-; CHECK-NEXT: .LBB9_4: # %cond.store1
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vmv.x.s a1, v8
-; CHECK-NEXT: sh a1, 4(a0)
-; CHECK-NEXT: srli a1, a1, 16
-; CHECK-NEXT: sh a1, 6(a0)
-; CHECK-NEXT: ret
+; SLOW-LABEL: masked_store_v2i32_align2:
+; SLOW: # %bb.0:
+; SLOW-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; SLOW-NEXT: vmseq.vi v9, v9, 0
+; SLOW-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; SLOW-NEXT: vmv.x.s a1, v9
+; SLOW-NEXT: andi a2, a1, 1
+; SLOW-NEXT: bnez a2, .LBB9_3
+; SLOW-NEXT: # %bb.1: # %else
+; SLOW-NEXT: andi a1, a1, 2
+; SLOW-NEXT: bnez a1, .LBB9_4
+; SLOW-NEXT: .LBB9_2: # %else2
+; SLOW-NEXT: ret
+; SLOW-NEXT: .LBB9_3: # %cond.store
+; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; SLOW-NEXT: vmv.x.s a2, v8
+; SLOW-NEXT: sh a2, 0(a0)
+; SLOW-NEXT: srli a2, a2, 16
+; SLOW-NEXT: sh a2, 2(a0)
+; SLOW-NEXT: andi a1, a1, 2
+; SLOW-NEXT: beqz a1, .LBB9_2
+; SLOW-NEXT: .LBB9_4: # %cond.store1
+; SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; SLOW-NEXT: vslidedown.vi v8, v8, 1
+; SLOW-NEXT: vmv.x.s a1, v8
+; SLOW-NEXT: sh a1, 4(a0)
+; SLOW-NEXT: srli a1, a1, 16
+; SLOW-NEXT: sh a1, 6(a0)
+; SLOW-NEXT: ret
+;
+; FAST-LABEL: masked_store_v2i32_align2:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; FAST-NEXT: vmseq.vi v0, v9, 0
+; FAST-NEXT: vse32.v v8, (a0), v0.t
+; FAST-NEXT: ret
%mask = icmp eq <2 x i32> %m, zeroinitializer
call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %a, i32 2, <2 x i1> %mask)
ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
index 855bf4d2b73a68..f76381bcdc6dec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
@@ -4,9 +4,9 @@
; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v < %s \
; RUN: -verify-machineinstrs | FileCheck %s
; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \
-; RUN: -verify-machineinstrs | FileCheck --check-prefix=UNALIGNED %s
+; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s
; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \
-; RUN: -verify-machineinstrs | FileCheck --check-prefix=UNALIGNED %s
+; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s
define <vscale x 1 x i32> @unaligned_load_nxv1i32_a1(<vscale x 1 x i32>* %ptr) {
@@ -16,11 +16,11 @@ define <vscale x 1 x i32> @unaligned_load_nxv1i32_a1(<vscale x 1 x i32>* %ptr) {
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv1i32_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; UNALIGNED-NEXT: vle32.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv1i32_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; FAST-NEXT: vle32.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 1 x i32>, <vscale x 1 x i32>* %ptr, align 1
ret <vscale x 1 x i32> %v
}
@@ -32,11 +32,11 @@ define <vscale x 1 x i32> @unaligned_load_nxv1i32_a2(<vscale x 1 x i32>* %ptr) {
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv1i32_a2:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; UNALIGNED-NEXT: vle32.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv1i32_a2:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; FAST-NEXT: vle32.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 1 x i32>, <vscale x 1 x i32>* %ptr, align 2
ret <vscale x 1 x i32> %v
}
@@ -48,11 +48,11 @@ define <vscale x 1 x i32> @aligned_load_nxv1i32_a4(<vscale x 1 x i32>* %ptr) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: aligned_load_nxv1i32_a4:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; UNALIGNED-NEXT: vle32.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: aligned_load_nxv1i32_a4:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; FAST-NEXT: vle32.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 1 x i32>, <vscale x 1 x i32>* %ptr, align 4
ret <vscale x 1 x i32> %v
}
@@ -63,10 +63,10 @@ define <vscale x 1 x i64> @unaligned_load_nxv1i64_a1(<vscale x 1 x i64>* %ptr) {
; CHECK-NEXT: vl1r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv1i64_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl1re64.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv1i64_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vl1re64.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 1 x i64>, <vscale x 1 x i64>* %ptr, align 1
ret <vscale x 1 x i64> %v
}
@@ -77,10 +77,10 @@ define <vscale x 1 x i64> @unaligned_load_nxv1i64_a4(<vscale x 1 x i64>* %ptr) {
; CHECK-NEXT: vl1r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv1i64_a4:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl1re64.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv1i64_a4:
+; FAST: # %bb.0:
+; FAST-NEXT: vl1re64.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 1 x i64>, <vscale x 1 x i64>* %ptr, align 4
ret <vscale x 1 x i64> %v
}
@@ -91,10 +91,10 @@ define <vscale x 1 x i64> @aligned_load_nxv1i64_a8(<vscale x 1 x i64>* %ptr) {
; CHECK-NEXT: vl1re64.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: aligned_load_nxv1i64_a8:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl1re64.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: aligned_load_nxv1i64_a8:
+; FAST: # %bb.0:
+; FAST-NEXT: vl1re64.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 1 x i64>, <vscale x 1 x i64>* %ptr, align 8
ret <vscale x 1 x i64> %v
}
@@ -105,10 +105,10 @@ define <vscale x 2 x i64> @unaligned_load_nxv2i64_a1(<vscale x 2 x i64>* %ptr) {
; CHECK-NEXT: vl2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv2i64_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re64.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv2i64_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re64.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 2 x i64>, <vscale x 2 x i64>* %ptr, align 1
ret <vscale x 2 x i64> %v
}
@@ -119,10 +119,10 @@ define <vscale x 2 x i64> @unaligned_load_nxv2i64_a4(<vscale x 2 x i64>* %ptr) {
; CHECK-NEXT: vl2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv2i64_a4:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re64.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv2i64_a4:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re64.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 2 x i64>, <vscale x 2 x i64>* %ptr, align 4
ret <vscale x 2 x i64> %v
}
@@ -133,10 +133,10 @@ define <vscale x 2 x i64> @aligned_load_nxv2i64_a8(<vscale x 2 x i64>* %ptr) {
; CHECK-NEXT: vl2re64.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: aligned_load_nxv2i64_a8:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re64.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: aligned_load_nxv2i64_a8:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re64.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 2 x i64>, <vscale x 2 x i64>* %ptr, align 8
ret <vscale x 2 x i64> %v
}
@@ -149,11 +149,11 @@ define <vscale x 1 x i1> @unaligned_load_nxv1i1_a1(<vscale x 1 x i1>* %ptr) {
; CHECK-NEXT: vlm.v v0, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv1i1_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; UNALIGNED-NEXT: vlm.v v0, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv1i1_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; FAST-NEXT: vlm.v v0, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 1 x i1>, <vscale x 1 x i1>* %ptr, align 1
ret <vscale x 1 x i1> %v
}
@@ -164,10 +164,10 @@ define <vscale x 4 x float> @unaligned_load_nxv4f32_a1(<vscale x 4 x float>* %pt
; CHECK-NEXT: vl2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv4f32_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re32.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv4f32_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re32.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 1
ret <vscale x 4 x float> %v
}
@@ -178,10 +178,10 @@ define <vscale x 4 x float> @unaligned_load_nxv4f32_a2(<vscale x 4 x float>* %pt
; CHECK-NEXT: vl2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv4f32_a2:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re32.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv4f32_a2:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re32.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 2
ret <vscale x 4 x float> %v
}
@@ -192,10 +192,10 @@ define <vscale x 4 x float> @aligned_load_nxv4f32_a4(<vscale x 4 x float>* %ptr)
; CHECK-NEXT: vl2re32.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: aligned_load_nxv4f32_a4:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re32.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: aligned_load_nxv4f32_a4:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re32.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 4
ret <vscale x 4 x float> %v
}
@@ -206,10 +206,10 @@ define <vscale x 8 x half> @unaligned_load_nxv8f16_a1(<vscale x 8 x half>* %ptr)
; CHECK-NEXT: vl2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_load_nxv8f16_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re16.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_load_nxv8f16_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re16.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 8 x half>, <vscale x 8 x half>* %ptr, align 1
ret <vscale x 8 x half> %v
}
@@ -220,10 +220,10 @@ define <vscale x 8 x half> @aligned_load_nxv8f16_a2(<vscale x 8 x half>* %ptr) {
; CHECK-NEXT: vl2re16.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: aligned_load_nxv8f16_a2:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vl2re16.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: aligned_load_nxv8f16_a2:
+; FAST: # %bb.0:
+; FAST-NEXT: vl2re16.v v8, (a0)
+; FAST-NEXT: ret
%v = load <vscale x 8 x half>, <vscale x 8 x half>* %ptr, align 2
ret <vscale x 8 x half> %v
}
@@ -234,10 +234,10 @@ define void @unaligned_store_nxv4i32_a1(<vscale x 4 x i32> %x, <vscale x 4 x i32
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_store_nxv4i32_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vs2r.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_store_nxv4i32_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vs2r.v v8, (a0)
+; FAST-NEXT: ret
store <vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr, align 1
ret void
}
@@ -248,10 +248,10 @@ define void @unaligned_store_nxv4i32_a2(<vscale x 4 x i32> %x, <vscale x 4 x i32
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_store_nxv4i32_a2:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vs2r.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_store_nxv4i32_a2:
+; FAST: # %bb.0:
+; FAST-NEXT: vs2r.v v8, (a0)
+; FAST-NEXT: ret
store <vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr, align 2
ret void
}
@@ -262,10 +262,10 @@ define void @aligned_store_nxv4i32_a4(<vscale x 4 x i32> %x, <vscale x 4 x i32>*
; CHECK-NEXT: vs2r.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: aligned_store_nxv4i32_a4:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vs2r.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: aligned_store_nxv4i32_a4:
+; FAST: # %bb.0:
+; FAST-NEXT: vs2r.v v8, (a0)
+; FAST-NEXT: ret
store <vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr, align 4
ret void
}
@@ -277,11 +277,11 @@ define void @unaligned_store_nxv1i16_a1(<vscale x 1 x i16> %x, <vscale x 1 x i16
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: unaligned_store_nxv1i16_a1:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; UNALIGNED-NEXT: vse16.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: unaligned_store_nxv1i16_a1:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; FAST-NEXT: vse16.v v8, (a0)
+; FAST-NEXT: ret
store <vscale x 1 x i16> %x, <vscale x 1 x i16>* %ptr, align 1
ret void
}
@@ -293,11 +293,11 @@ define void @aligned_store_nxv1i16_a2(<vscale x 1 x i16> %x, <vscale x 1 x i16>*
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
;
-; UNALIGNED-LABEL: aligned_store_nxv1i16_a2:
-; UNALIGNED: # %bb.0:
-; UNALIGNED-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; UNALIGNED-NEXT: vse16.v v8, (a0)
-; UNALIGNED-NEXT: ret
+; FAST-LABEL: aligned_store_nxv1i16_a2:
+; FAST: # %bb.0:
+; FAST-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; FAST-NEXT: vse16.v v8, (a0)
+; FAST-NEXT: ret
store <vscale x 1 x i16> %x, <vscale x 1 x i16>* %ptr, align 2
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index 4fddcfd4182ded..190f63e5e3a2bd 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=ALL,NOMISALIGN,RV32I %s
+; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV32I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=ALL,NOMISALIGN,RV64I %s
+; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV64I %s
; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=ALL,MISALIGN,MISALIGN-RV32I %s
+; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s
; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=ALL,MISALIGN,MISALIGN-RV64I %s
+; RUN: | FileCheck -check-prefixes=ALL,FAST,RV64I-FAST %s
; A collection of cases showing codegen for unaligned loads and stores
@@ -20,64 +20,64 @@ define i8 @load_i8(ptr %p) {
}
define i16 @load_i16(ptr %p) {
-; NOMISALIGN-LABEL: load_i16:
-; NOMISALIGN: # %bb.0:
-; NOMISALIGN-NEXT: lbu a1, 1(a0)
-; NOMISALIGN-NEXT: lbu a0, 0(a0)
-; NOMISALIGN-NEXT: slli a1, a1, 8
-; NOMISALIGN-NEXT: or a0, a1, a0
-; NOMISALIGN-NEXT: ret
+; SLOW-LABEL: load_i16:
+; SLOW: # %bb.0:
+; SLOW-NEXT: lbu a1, 1(a0)
+; SLOW-NEXT: lbu a0, 0(a0)
+; SLOW-NEXT: slli a1, a1, 8
+; SLOW-NEXT: or a0, a1, a0
+; SLOW-NEXT: ret
;
-; MISALIGN-LABEL: load_i16:
-; MISALIGN: # %bb.0:
-; MISALIGN-NEXT: lh a0, 0(a0)
-; MISALIGN-NEXT: ret
+; FAST-LABEL: load_i16:
+; FAST: # %bb.0:
+; FAST-NEXT: lh a0, 0(a0)
+; FAST-NEXT: ret
%res = load i16, ptr %p, align 1
ret i16 %res
}
define i24 @load_i24(ptr %p) {
-; NOMISALIGN-LABEL: load_i24:
-; NOMISALIGN: # %bb.0:
-; NOMISALIGN-NEXT: lbu a1, 1(a0)
-; NOMISALIGN-NEXT: lbu a2, 0(a0)
-; NOMISALIGN-NEXT: lbu a0, 2(a0)
-; NOMISALIGN-NEXT: slli a1, a1, 8
-; NOMISALIGN-NEXT: or a1, a1, a2
-; NOMISALIGN-NEXT: slli a0, a0, 16
-; NOMISALIGN-NEXT: or a0, a1, a0
-; NOMISALIGN-NEXT: ret
+; SLOW-LABEL: load_i24:
+; SLOW: # %bb.0:
+; SLOW-NEXT: lbu a1, 1(a0)
+; SLOW-NEXT: lbu a2, 0(a0)
+; SLOW-NEXT: lbu a0, 2(a0)
+; SLOW-NEXT: slli a1, a1, 8
+; SLOW-NEXT: or a1, a1, a2
+; SLOW-NEXT: slli a0, a0, 16
+; SLOW-NEXT: or a0, a1, a0
+; SLOW-NEXT: ret
;
-; MISALIGN-LABEL: load_i24:
-; MISALIGN: # %bb.0:
-; MISALIGN-NEXT: lbu a1, 2(a0)
-; MISALIGN-NEXT: lhu a0, 0(a0)
-; MISALIGN-NEXT: slli a1, a1, 16
-; MISALIGN-NEXT: or a0, a0, a1
-; MISALIGN-NEXT: ret
+; FAST-LABEL: load_i24:
+; FAST: # %bb.0:
+; FAST-NEXT: lbu a1, 2(a0)
+; FAST-NEXT: lhu a0, 0(a0)
+; FAST-NEXT: slli a1, a1, 16
+; FAST-NEXT: or a0, a0, a1
+; FAST-NEXT: ret
%res = load i24, ptr %p, align 1
ret i24 %res
}
define i32 @load_i32(ptr %p) {
-; NOMISALIGN-LABEL: load_i32:
-; NOMISALIGN: # %bb.0:
-; NOMISALIGN-NEXT: lbu a1, 1(a0)
-; NOMISALIGN-NEXT: lbu a2, 0(a0)
-; NOMISALIGN-NEXT: lbu a3, 2(a0)
-; NOMISALIGN-NEXT: lbu a0, 3(a0)
-; NOMISALIGN-NEXT: slli a1, a1, 8
-; NOMISALIGN-NEXT: or a1, a1, a2
-; NOMISALIGN-NEXT: slli a3, a3, 16
-; NOMISALIGN-NEXT: slli a0, a0, 24
-; NOMISALIGN-NEXT: or a0, a0, a3
-; NOMISALIGN-NEXT: or a0, a0, a1
-; NOMISALIGN-NEXT: ret
+; SLOW-LABEL: load_i32:
+; SLOW: # %bb.0:
+; SLOW-NEXT: lbu a1, 1(a0)
+; SLOW-NEXT: lbu a2, 0(a0)
+; SLOW-NEXT: lbu a3, 2(a0)
+; SLOW-NEXT: lbu a0, 3(a0)
+; SLOW-NEXT: slli a1, a1, 8
+; SLOW-NEXT: or a1, a1, a2
+; SLOW-NEXT: slli a3, a3, 16
+; SLOW-NEXT: slli a0, a0, 24
+; SLOW-NEXT: or a0, a0, a3
+; SLOW-NEXT: or a0, a0, a1
+; SLOW-NEXT: ret
;
-; MISALIGN-LABEL: load_i32:
-; MISALIGN: # %bb.0:
-; MISALIGN-NEXT: lw a0, 0(a0)
-; MISALIGN-NEXT: ret
+; FAST-LABEL: load_i32:
+; FAST: # %bb.0:
+; FAST-NEXT: lw a0, 0(a0)
+; FAST-NEXT: ret
%res = load i32, ptr %p, align 1
ret i32 %res
}
@@ -134,17 +134,17 @@ define i64 @load_i64(ptr %p) {
; RV64I-NEXT: or a0, a0, a1
; RV64I-NEXT: ret
;
-; MISALIGN-RV32I-LABEL: load_i64:
-; MISALIGN-RV32I: # %bb.0:
-; MISALIGN-RV32I-NEXT: lw a2, 0(a0)
-; MISALIGN-RV32I-NEXT: lw a1, 4(a0)
-; MISALIGN-RV32I-NEXT: mv a0, a2
-; MISALIGN-RV32I-NEXT: ret
+; RV32I-FAST-LABEL: load_i64:
+; RV32I-FAST: # %bb.0:
+; RV32I-FAST-NEXT: lw a2, 0(a0)
+; RV32I-FAST-NEXT: lw a1, 4(a0)
+; RV32I-FAST-NEXT: mv a0, a2
+; RV32I-FAST-NEXT: ret
;
-; MISALIGN-RV64I-LABEL: load_i64:
-; MISALIGN-RV64I: # %bb.0:
-; MISALIGN-RV64I-NEXT: ld a0, 0(a0)
-; MISALIGN-RV64I-NEXT: ret
+; RV64I-FAST-LABEL: load_i64:
+; RV64I-FAST: # %bb.0:
+; RV64I-FAST-NEXT: ld a0, 0(a0)
+; RV64I-FAST-NEXT: ret
%res = load i64, ptr %p, align 1
ret i64 %res
}
@@ -159,57 +159,57 @@ define void @store_i8(ptr %p, i8 %v) {
}
define void @store_i16(ptr %p, i16 %v) {
-; NOMISALIGN-LABEL: store_i16:
-; NOMISALIGN: # %bb.0:
-; NOMISALIGN-NEXT: sb a1, 0(a0)
-; NOMISALIGN-NEXT: srli a1, a1, 8
-; NOMISALIGN-NEXT: sb a1, 1(a0)
-; NOMISALIGN-NEXT: ret
+; SLOW-LABEL: store_i16:
+; SLOW: # %bb.0:
+; SLOW-NEXT: sb a1, 0(a0)
+; SLOW-NEXT: srli a1, a1, 8
+; SLOW-NEXT: sb a1, 1(a0)
+; SLOW-NEXT: ret
;
-; MISALIGN-LABEL: store_i16:
-; MISALIGN: # %bb.0:
-; MISALIGN-NEXT: sh a1, 0(a0)
-; MISALIGN-NEXT: ret
+; FAST-LABEL: store_i16:
+; FAST: # %bb.0:
+; FAST-NEXT: sh a1, 0(a0)
+; FAST-NEXT: ret
store i16 %v, ptr %p, align 1
ret void
}
define void @store_i24(ptr %p, i24 %v) {
-; NOMISALIGN-LABEL: store_i24:
-; NOMISALIGN: # %bb.0:
-; NOMISALIGN-NEXT: sb a1, 0(a0)
-; NOMISALIGN-NEXT: srli a2, a1, 8
-; NOMISALIGN-NEXT: sb a2, 1(a0)
-; NOMISALIGN-NEXT: srli a1, a1, 16
-; NOMISALIGN-NEXT: sb a1, 2(a0)
-; NOMISALIGN-NEXT: ret
+; SLOW-LABEL: store_i24:
+; SLOW: # %bb.0:
+; SLOW-NEXT: sb a1, 0(a0)
+; SLOW-NEXT: srli a2, a1, 8
+; SLOW-NEXT: sb a2, 1(a0)
+; SLOW-NEXT: srli a1, a1, 16
+; SLOW-NEXT: sb a1, 2(a0)
+; SLOW-NEXT: ret
;
-; MISALIGN-LABEL: store_i24:
-; MISALIGN: # %bb.0:
-; MISALIGN-NEXT: sh a1, 0(a0)
-; MISALIGN-NEXT: srli a1, a1, 16
-; MISALIGN-NEXT: sb a1, 2(a0)
-; MISALIGN-NEXT: ret
+; FAST-LABEL: store_i24:
+; FAST: # %bb.0:
+; FAST-NEXT: sh a1, 0(a0)
+; FAST-NEXT: srli a1, a1, 16
+; FAST-NEXT: sb a1, 2(a0)
+; FAST-NEXT: ret
store i24 %v, ptr %p, align 1
ret void
}
define void @store_i32(ptr %p, i32 %v) {
-; NOMISALIGN-LABEL: store_i32:
-; NOMISALIGN: # %bb.0:
-; NOMISALIGN-NEXT: sb a1, 0(a0)
-; NOMISALIGN-NEXT: srli a2, a1, 24
-; NOMISALIGN-NEXT: sb a2, 3(a0)
-; NOMISALIGN-NEXT: srli a2, a1, 16
-; NOMISALIGN-NEXT: sb a2, 2(a0)
-; NOMISALIGN-NEXT: srli a1, a1, 8
-; NOMISALIGN-NEXT: sb a1, 1(a0)
-; NOMISALIGN-NEXT: ret
+; SLOW-LABEL: store_i32:
+; SLOW: # %bb.0:
+; SLOW-NEXT: sb a1, 0(a0)
+; SLOW-NEXT: srli a2, a1, 24
+; SLOW-NEXT: sb a2, 3(a0)
+; SLOW-NEXT: srli a2, a1, 16
+; SLOW-NEXT: sb a2, 2(a0)
+; SLOW-NEXT: srli a1, a1, 8
+; SLOW-NEXT: sb a1, 1(a0)
+; SLOW-NEXT: ret
;
-; MISALIGN-LABEL: store_i32:
-; MISALIGN: # %bb.0:
-; MISALIGN-NEXT: sw a1, 0(a0)
-; MISALIGN-NEXT: ret
+; FAST-LABEL: store_i32:
+; FAST: # %bb.0:
+; FAST-NEXT: sw a1, 0(a0)
+; FAST-NEXT: ret
store i32 %v, ptr %p, align 1
ret void
}
@@ -252,16 +252,16 @@ define void @store_i64(ptr %p, i64 %v) {
; RV64I-NEXT: sb a1, 1(a0)
; RV64I-NEXT: ret
;
-; MISALIGN-RV32I-LABEL: store_i64:
-; MISALIGN-RV32I: # %bb.0:
-; MISALIGN-RV32I-NEXT: sw a2, 4(a0)
-; MISALIGN-RV32I-NEXT: sw a1, 0(a0)
-; MISALIGN-RV32I-NEXT: ret
+; RV32I-FAST-LABEL: store_i64:
+; RV32I-FAST: # %bb.0:
+; RV32I-FAST-NEXT: sw a2, 4(a0)
+; RV32I-FAST-NEXT: sw a1, 0(a0)
+; RV32I-FAST-NEXT: ret
;
-; MISALIGN-RV64I-LABEL: store_i64:
-; MISALIGN-RV64I: # %bb.0:
-; MISALIGN-RV64I-NEXT: sd a1, 0(a0)
-; MISALIGN-RV64I-NEXT: ret
+; RV64I-FAST-LABEL: store_i64:
+; RV64I-FAST: # %bb.0:
+; RV64I-FAST-NEXT: sd a1, 0(a0)
+; RV64I-FAST-NEXT: ret
store i64 %v, ptr %p, align 1
ret void
}
More information about the llvm-commits
mailing list