[llvm] a0e3cee - [AArch64][SVE] Change pointer type of struct load/store intrinsics.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 10 06:03:13 PDT 2020
Author: Sander de Smalen
Date: 2020-06-10T14:02:35+01:00
New Revision: a0e3ceea6ce909067717bb703f4aaf84d88a3bbb
URL: https://github.com/llvm/llvm-project/commit/a0e3ceea6ce909067717bb703f4aaf84d88a3bbb
DIFF: https://github.com/llvm/llvm-project/commit/a0e3ceea6ce909067717bb703f4aaf84d88a3bbb.diff
LOG: [AArch64][SVE] Change pointer type of struct load/store intrinsics.
Instead of loading from e.g. `<vscale x 16 x i8>*`, load from element
pointer `i8*`. This is more in line with the other load/store
intrinsics for SVE.
Reviewers: fpetrogalli, c-rhodes, rengolin, efriedma
Reviewed By: efriedma
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D81458
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index d6755a2331c3..7feded99bbd7 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -815,7 +815,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
class AdvSIMD_ManyVec_PredLoad_Intrinsic
- : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyptr_ty],
+ : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMPointerToElt<0>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_PredLoad_Intrinsic
@@ -834,20 +834,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Vec_PredStore_Intrinsic
: Intrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerTo<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_PredStore_Intrinsic
: Intrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerTo<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_PredStore_Intrinsic
: Intrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerTo<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
[IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
class AdvSIMD_SVE_Index_Intrinsic
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
index 1244782bd56b..838e93d34657 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -256,12 +256,11 @@ define <vscale x 2 x double> @ldnt1d_f64(<vscale x 2 x i1> %pred, double* %addr)
; LD2B
;
-define <vscale x 32 x i8> @ld2b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+define <vscale x 32 x i8> @ld2b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
; CHECK-LABEL: ld2b_i8:
; CHECK: ld2b { z0.b, z1.b }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0nxv16i8(<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %addr)
+ %res = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
ret <vscale x 32 x i8> %res
}
@@ -269,21 +268,19 @@ define <vscale x 32 x i8> @ld2b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>*
; LD2H
;
-define <vscale x 16 x i16> @ld2h_i16(<vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+define <vscale x 16 x i16> @ld2h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
; CHECK-LABEL: ld2h_i16:
; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0nxv8i16(<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %addr)
+ %res = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
ret <vscale x 16 x i16> %res
}
-define <vscale x 16 x half> @ld2h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+define <vscale x 16 x half> @ld2h_f16(<vscale x 8 x i1> %pred, half* %addr) {
; CHECK-LABEL: ld2h_f16:
; CHECK: ld2h { z0.h, z1.h }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0nxv8f16(<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %addr)
+ %res = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
ret <vscale x 16 x half> %res
}
@@ -291,21 +288,19 @@ define <vscale x 16 x half> @ld2h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x hal
; LD2W
;
-define <vscale x 8 x i32> @ld2w_i32(<vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+define <vscale x 8 x i32> @ld2w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
; CHECK-LABEL: ld2w_i32:
; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0nxv4i32(<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %addr)
+ %res = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
ret <vscale x 8 x i32> %res
}
-define <vscale x 8 x float> @ld2w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+define <vscale x 8 x float> @ld2w_f32(<vscale x 4 x i1> %pred, float* %addr) {
; CHECK-LABEL: ld2w_f32:
; CHECK: ld2w { z0.s, z1.s }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0nxv4f32(<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %addr)
+ %res = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
ret <vscale x 8 x float> %res
}
@@ -313,21 +308,19 @@ define <vscale x 8 x float> @ld2w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x flo
; LD2D
;
-define <vscale x 4 x i64> @ld2d_i64(<vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+define <vscale x 4 x i64> @ld2d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
; CHECK-LABEL: ld2d_i64:
; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0nxv2i64(<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %addr)
+ %res = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
ret <vscale x 4 x i64> %res
}
-define <vscale x 4 x double> @ld2d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+define <vscale x 4 x double> @ld2d_f64(<vscale x 2 x i1> %pred, double* %addr) {
; CHECK-LABEL: ld2d_f64:
; CHECK: ld2d { z0.d, z1.d }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0nxv2f64(<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %addr)
+ %res = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
ret <vscale x 4 x double> %res
}
@@ -335,12 +328,11 @@ define <vscale x 4 x double> @ld2d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x do
; LD3B
;
-define <vscale x 48 x i8> @ld3b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+define <vscale x 48 x i8> @ld3b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
; CHECK-LABEL: ld3b_i8:
; CHECK: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0nxv16i8(<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %addr)
+ %res = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
ret <vscale x 48 x i8> %res
}
@@ -348,21 +340,19 @@ define <vscale x 48 x i8> @ld3b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>*
; LD3H
;
-define <vscale x 24 x i16> @ld3h_i16(<vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+define <vscale x 24 x i16> @ld3h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
; CHECK-LABEL: ld3h_i16:
; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0nxv8i16(<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %addr)
+ %res = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
ret <vscale x 24 x i16> %res
}
-define <vscale x 24 x half> @ld3h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+define <vscale x 24 x half> @ld3h_f16(<vscale x 8 x i1> %pred, half* %addr) {
; CHECK-LABEL: ld3h_f16:
; CHECK: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0nxv8f16(<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %addr)
+ %res = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
ret <vscale x 24 x half> %res
}
@@ -370,21 +360,19 @@ define <vscale x 24 x half> @ld3h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x hal
; LD3W
;
-define <vscale x 12 x i32> @ld3w_i32(<vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+define <vscale x 12 x i32> @ld3w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
; CHECK-LABEL: ld3w_i32:
; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0nxv4i32(<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %addr)
+ %res = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
ret <vscale x 12 x i32> %res
}
-define <vscale x 12 x float> @ld3w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+define <vscale x 12 x float> @ld3w_f32(<vscale x 4 x i1> %pred, float* %addr) {
; CHECK-LABEL: ld3w_f32:
; CHECK: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0nxv4f32(<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %addr)
+ %res = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
ret <vscale x 12 x float> %res
}
@@ -392,21 +380,19 @@ define <vscale x 12 x float> @ld3w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x fl
; LD3D
;
-define <vscale x 6 x i64> @ld3d_i64(<vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+define <vscale x 6 x i64> @ld3d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
; CHECK-LABEL: ld3d_i64:
; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0nxv2i64(<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %addr)
+ %res = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
ret <vscale x 6 x i64> %res
}
-define <vscale x 6 x double> @ld3d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+define <vscale x 6 x double> @ld3d_f64(<vscale x 2 x i1> %pred, double* %addr) {
; CHECK-LABEL: ld3d_f64:
; CHECK: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0nxv2f64(<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %addr)
+ %res = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
ret <vscale x 6 x double> %res
}
@@ -414,12 +400,11 @@ define <vscale x 6 x double> @ld3d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x do
; LD4B
;
-define <vscale x 64 x i8> @ld4b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+define <vscale x 64 x i8> @ld4b_i8(<vscale x 16 x i1> %pred, i8* %addr) {
; CHECK-LABEL: ld4b_i8:
; CHECK: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0nxv16i8(<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %addr)
+ %res = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1> %pred, i8* %addr)
ret <vscale x 64 x i8> %res
}
@@ -427,21 +412,19 @@ define <vscale x 64 x i8> @ld4b_i8(<vscale x 16 x i1> %pred, <vscale x 16 x i8>*
; LD4H
;
-define <vscale x 32 x i16> @ld4h_i16(<vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+define <vscale x 32 x i16> @ld4h_i16(<vscale x 8 x i1> %pred, i16* %addr) {
; CHECK-LABEL: ld4h_i16:
; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0nxv8i16(<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %addr)
+ %res = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(<vscale x 8 x i1> %pred, i16* %addr)
ret <vscale x 32 x i16> %res
}
-define <vscale x 32 x half> @ld4h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+define <vscale x 32 x half> @ld4h_f16(<vscale x 8 x i1> %pred, half* %addr) {
; CHECK-LABEL: ld4h_f16:
; CHECK: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0nxv8f16(<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %addr)
+ %res = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(<vscale x 8 x i1> %pred, half* %addr)
ret <vscale x 32 x half> %res
}
@@ -449,21 +432,19 @@ define <vscale x 32 x half> @ld4h_f16(<vscale x 8 x i1> %pred, <vscale x 8 x hal
; LD4W
;
-define <vscale x 16 x i32> @ld4w_i32(<vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+define <vscale x 16 x i32> @ld4w_i32(<vscale x 4 x i1> %pred, i32* %addr) {
; CHECK-LABEL: ld4w_i32:
; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0nxv4i32(<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %addr)
+ %res = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(<vscale x 4 x i1> %pred, i32* %addr)
ret <vscale x 16 x i32> %res
}
-define <vscale x 16 x float> @ld4w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+define <vscale x 16 x float> @ld4w_f32(<vscale x 4 x i1> %pred, float* %addr) {
; CHECK-LABEL: ld4w_f32:
; CHECK: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0nxv4f32(<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %addr)
+ %res = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1> %pred, float* %addr)
ret <vscale x 16 x float> %res
}
@@ -471,21 +452,19 @@ define <vscale x 16 x float> @ld4w_f32(<vscale x 4 x i1> %pred, <vscale x 4 x fl
; LD4D
;
-define <vscale x 8 x i64> @ld4d_i64(<vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+define <vscale x 8 x i64> @ld4d_i64(<vscale x 2 x i1> %pred, i64* %addr) {
; CHECK-LABEL: ld4d_i64:
; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0nxv2i64(<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %addr)
+ %res = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(<vscale x 2 x i1> %pred, i64* %addr)
ret <vscale x 8 x i64> %res
}
-define <vscale x 8 x double> @ld4d_f64(<vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+define <vscale x 8 x double> @ld4d_f64(<vscale x 2 x i1> %pred, double* %addr) {
; CHECK-LABEL: ld4d_f64:
; CHECK: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0]
; CHECK-NEXT: ret
- %res = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0nxv2f64(<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %addr)
+ %res = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1> %pred, double* %addr)
ret <vscale x 8 x double> %res
}
@@ -506,26 +485,26 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1>, h
declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1>, float*)
declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1>, double*)
-declare <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>*)
-declare <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>*)
-declare <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>*)
-declare <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>*)
-declare <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>*)
-declare <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>*)
-declare <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>*)
+declare <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1.p0i8(<vscale x 16 x i1>, i8*)
+declare <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1.p0i16(<vscale x 8 x i1>, i16*)
+declare <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1.p0i32(<vscale x 4 x i1>, i32*)
+declare <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1.p0i64(<vscale x 2 x i1>, i64*)
+declare <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1.p0f16(<vscale x 8 x i1>, half*)
+declare <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1.p0f32(<vscale x 4 x i1>, float*)
+declare <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1.p0f64(<vscale x 2 x i1>, double*)
+
+declare <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x 16 x i1>, i8*)
+declare <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1.p0i16(<vscale x 8 x i1>, i16*)
+declare <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1.p0i32(<vscale x 4 x i1>, i32*)
+declare <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1.p0i64(<vscale x 2 x i1>, i64*)
+declare <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1.p0f16(<vscale x 8 x i1>, half*)
+declare <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1.p0f32(<vscale x 4 x i1>, float*)
+declare <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1.p0f64(<vscale x 2 x i1>, double*)
+
+declare <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1>, i8*)
+declare <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1.p0i16(<vscale x 8 x i1>, i16*)
+declare <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1.p0i32(<vscale x 4 x i1>, i32*)
+declare <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1.p0i64(<vscale x 2 x i1>, i64*)
+declare <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1.p0f16(<vscale x 8 x i1>, half*)
+declare <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1.p0f32(<vscale x 4 x i1>, float*)
+declare <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1.p0f64(<vscale x 2 x i1>, double*)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index 8ef27dc7ed70..a5e278c65109 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -14,11 +14,11 @@ define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
; CHECK-LABEL: st2b_i8_valid_imm:
; CHECK: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2, i64 0
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -27,11 +27,11 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vsca
; CHECK: rdvl x[[N:[0-9]+]], #3
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -40,11 +40,11 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK: rdvl x[[N:[0-9]+]], #-18
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18, i64 0
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -53,11 +53,11 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK: rdvl x[[N:[0-9]+]], #16
; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16, i64 0
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -65,11 +65,11 @@ define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
; CHECK: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16, i64 0
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -77,11 +77,11 @@ define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
; CHECK: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14, i64 0
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -93,11 +93,11 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-LABEL: st2h_i16:
; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2
+ %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2, i64 0
call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %base)
+ i16* %base)
ret void
}
@@ -105,11 +105,11 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-LABEL: st2h_f16:
; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2
+ %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2, i64 0
call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %base)
+ half* %base)
ret void
}
@@ -121,11 +121,11 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-LABEL: st2w_i32:
; CHECK: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4
+ %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4, i64 0
call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %base)
+ i32* %base)
ret void
}
@@ -133,11 +133,11 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-LABEL: st2w_f32:
; CHECK: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6
+ %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6, i64 0
call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %base)
+ float* %base)
ret void
}
@@ -149,11 +149,11 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-LABEL: st2d_i64:
; CHECK: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8
+ %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8, i64 0
call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %base)
+ i64* %base)
ret void
}
@@ -161,11 +161,11 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-LABEL: st2d_f64:
; CHECK: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10
+ %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10, i64 0
call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %base)
+ double* %base)
ret void
}
@@ -177,12 +177,12 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
; CHECK-LABEL: st3b_i8_valid_imm:
; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -191,12 +191,12 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
; CHECK: rdvl x[[N:[0-9]+]], #4
; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -205,12 +205,12 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
; CHECK: rdvl x[[N:[0-9]+]], #5
; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -219,12 +219,12 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK: rdvl x[[N:[0-9]+]], #-27
; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -233,12 +233,12 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK: rdvl x[[N:[0-9]+]], #24
; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -246,12 +246,12 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -259,12 +259,12 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21, i64 0
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -276,12 +276,12 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-LABEL: st3h_i16:
; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6
+ %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6, i64 0
call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %base)
+ i16* %base)
ret void
}
@@ -289,12 +289,12 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-LABEL: st3h_f16:
; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9
+ %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9, i64 0
call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %base)
+ half* %base)
ret void
}
@@ -306,12 +306,12 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-LABEL: st3w_i32:
; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12
+ %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12, i64 0
call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %base)
+ i32* %base)
ret void
}
@@ -319,12 +319,12 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-LABEL: st3w_f32:
; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15
+ %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15, i64 0
call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %base)
+ float* %base)
ret void
}
@@ -336,12 +336,12 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-LABEL: st3d_i64:
; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18
+ %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18, i64 0
call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %base)
+ i64* %base)
ret void
}
@@ -349,12 +349,12 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-LABEL: st3d_f64:
; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3
+ %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3, i64 0
call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %base)
+ double* %base)
ret void
}
@@ -366,13 +366,13 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
; CHECK-LABEL: st4b_i8_valid_imm:
; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -381,13 +381,13 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
; CHECK: rdvl x[[N:[0-9]+]], #5
; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -396,13 +396,13 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
; CHECK: rdvl x[[N:[0-9]+]], #6
; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -411,13 +411,13 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <v
; CHECK: rdvl x[[N:[0-9]+]], #7
; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -433,13 +433,13 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK-DAG: mul x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -455,13 +455,13 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
; CHECK-DAG: mul x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -469,13 +469,13 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -483,13 +483,13 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
+ %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28, i64 0
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %base)
ret void
}
@@ -501,13 +501,13 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK-LABEL: st4h_i16:
; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
+ %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8, i64 0
call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i16> %v3,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %base)
+ i16* %base)
ret void
}
@@ -515,13 +515,13 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK-LABEL: st4h_f16:
; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12
+ %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12, i64 0
call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x half> %v3,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %base)
+ half* %base)
ret void
}
@@ -533,13 +533,13 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK-LABEL: st4w_i32:
; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16
+ %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16, i64 0
call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i32> %v3,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %base)
+ i32* %base)
ret void
}
@@ -547,13 +547,13 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK-LABEL: st4w_f32:
; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20
+ %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20, i64 0
call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x float> %v3,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %base)
+ float* %base)
ret void
}
@@ -565,13 +565,13 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK-LABEL: st4d_i64:
; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24
+ %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24, i64 0
call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i64> %v3,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %base)
+ i64* %base)
ret void
}
@@ -579,36 +579,36 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK-LABEL: st4d_f64:
; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
; CHECK-NEXT: ret
- %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28
+ %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28, i64 0
call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x double> %v3,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %base)
- ret void
-}
-
-declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+ double* %base)
+ ret void
+}
+
+declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+
+declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+
+declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index 4945fdca9498..59562905f891 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -9,11 +9,10 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; CHECK: st2b { z0.b, z1.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, i8* %addr, i64 %offset
- %base = bitcast i8* %1 to <vscale x 16 x i8>*
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %1)
ret void
}
@@ -26,11 +25,10 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, i16* %addr, i64 %offset
- %base = bitcast i16* %1 to <vscale x 8 x i16>*
call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %base)
+ i16* %1)
ret void
}
@@ -39,11 +37,10 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, half* %addr, i64 %offset
- %base = bitcast half* %1 to <vscale x 8 x half>*
call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %base)
+ half* %1)
ret void
}
@@ -56,11 +53,10 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, i32* %addr, i64 %offset
- %base = bitcast i32* %1 to <vscale x 4 x i32>*
call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %base)
+ i32* %1)
ret void
}
@@ -69,11 +65,10 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, float* %addr, i64 %offset
- %base = bitcast float* %1 to <vscale x 4 x float>*
call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %base)
+ float* %1)
ret void
}
@@ -86,11 +81,10 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, i64* %addr, i64 %offset
- %base = bitcast i64* %1 to <vscale x 2 x i64>*
call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %base)
+ i64* %1)
ret void
}
@@ -99,11 +93,10 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, double* %addr, i64 %offset
- %base = bitcast double* %1 to <vscale x 2 x double>*
call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %base)
+ double* %1)
ret void
}
@@ -116,12 +109,11 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, i8* %addr, i64 %offset
- %base = bitcast i8* %1 to <vscale x 16 x i8>*
call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %1)
ret void
}
@@ -134,12 +126,11 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, i16* %addr, i64 %offset
- %base = bitcast i16* %1 to <vscale x 8 x i16>*
call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %base)
+ i16* %1)
ret void
}
@@ -148,12 +139,11 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, half* %addr, i64 %offset
- %base = bitcast half* %1 to <vscale x 8 x half>*
call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %base)
+ half* %1)
ret void
}
@@ -166,12 +156,11 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, i32* %addr, i64 %offset
- %base = bitcast i32* %1 to <vscale x 4 x i32>*
call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %base)
+ i32* %1)
ret void
}
@@ -180,12 +169,11 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, float* %addr, i64 %offset
- %base = bitcast float* %1 to <vscale x 4 x float>*
call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %base)
+ float* %1)
ret void
}
@@ -198,12 +186,11 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, i64* %addr, i64 %offset
- %base = bitcast i64* %1 to <vscale x 2 x i64>*
call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %base)
+ i64* %1)
ret void
}
@@ -212,12 +199,11 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, double* %addr, i64 %offset
- %base = bitcast double* %1 to <vscale x 2 x double>*
call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %base)
+ double* %1)
ret void
}
@@ -230,13 +216,12 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
; CHECK-NEXT: ret
%1 = getelementptr i8, i8* %addr, i64 %offset
- %base = bitcast i8* %1 to <vscale x 16 x i8>*
call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %base)
+ i8* %1)
ret void
}
@@ -249,13 +234,12 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr i16, i16* %addr, i64 %offset
- %base = bitcast i16* %1 to <vscale x 8 x i16>*
call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i16> %v3,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %base)
+ i16* %1)
ret void
}
@@ -264,13 +248,12 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%1 = getelementptr half, half* %addr, i64 %offset
- %base = bitcast half* %1 to <vscale x 8 x half>*
call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x half> %v3,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %base)
+ half* %1)
ret void
}
@@ -283,13 +266,12 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr i32, i32* %addr, i64 %offset
- %base = bitcast i32* %1 to <vscale x 4 x i32>*
call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i32> %v3,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %base)
+ i32* %1)
ret void
}
@@ -298,13 +280,12 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
%1 = getelementptr float, float* %addr, i64 %offset
- %base = bitcast float* %1 to <vscale x 4 x float>*
call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x float> %v3,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %base)
+ float* %1)
ret void
}
@@ -317,13 +298,12 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr i64, i64* %addr, i64 %offset
- %base = bitcast i64* %1 to <vscale x 2 x i64>*
call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i64> %v3,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %base)
+ i64* %1)
ret void
}
@@ -332,36 +312,35 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
%1 = getelementptr double, double* %addr, i64 %offset
- %base = bitcast double* %1 to <vscale x 2 x double>*
call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x double> %v3,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %base)
+ double* %1)
ret void
}
-declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+
+declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+
+declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index ac2b9a32b14e..6416376c7e1d 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -4,14 +4,14 @@
; ST2B
;
-define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr) {
; CHECK-LABEL: st2b_i8:
; CHECK: st2b { z0.b, z1.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
<vscale x 16 x i8> %v1,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %addr)
+ i8* %addr)
ret void
}
@@ -19,25 +19,25 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; ST2H
;
-define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr) {
; CHECK-LABEL: st2h_i16:
; CHECK: st2h { z0.h, z1.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
<vscale x 8 x i16> %v1,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %addr)
+ i16* %addr)
ret void
}
-define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr) {
; CHECK-LABEL: st2h_f16:
; CHECK: st2h { z0.h, z1.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
<vscale x 8 x half> %v1,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %addr)
+ half* %addr)
ret void
}
@@ -45,25 +45,25 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; ST2W
;
-define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr) {
; CHECK-LABEL: st2w_i32:
; CHECK: st2w { z0.s, z1.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
<vscale x 4 x i32> %v1,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %addr)
+ i32* %addr)
ret void
}
-define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr) {
; CHECK-LABEL: st2w_f32:
; CHECK: st2w { z0.s, z1.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
<vscale x 4 x float> %v1,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %addr)
+ float* %addr)
ret void
}
@@ -71,25 +71,25 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; ST2D
;
-define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr) {
; CHECK-LABEL: st2d_i64:
; CHECK: st2d { z0.d, z1.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
<vscale x 2 x i64> %v1,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %addr)
+ i64* %addr)
ret void
}
-define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr) {
; CHECK-LABEL: st2d_f64:
; CHECK: st2d { z0.d, z1.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
<vscale x 2 x double> %v1,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %addr)
+ double* %addr)
ret void
}
@@ -97,7 +97,7 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; ST3B
;
-define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr) {
; CHECK-LABEL: st3b_i8:
; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0]
; CHECK-NEXT: ret
@@ -105,7 +105,7 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
<vscale x 16 x i8> %v1,
<vscale x 16 x i8> %v2,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %addr)
+ i8* %addr)
ret void
}
@@ -113,7 +113,7 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; ST3H
;
-define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr) {
; CHECK-LABEL: st3h_i16:
; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -121,11 +121,11 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
<vscale x 8 x i16> %v1,
<vscale x 8 x i16> %v2,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %addr)
+ i16* %addr)
ret void
}
-define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr) {
; CHECK-LABEL: st3h_f16:
; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -133,7 +133,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
<vscale x 8 x half> %v1,
<vscale x 8 x half> %v2,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %addr)
+ half* %addr)
ret void
}
@@ -141,7 +141,7 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; ST3W
;
-define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr) {
; CHECK-LABEL: st3w_i32:
; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -149,11 +149,11 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
<vscale x 4 x i32> %v1,
<vscale x 4 x i32> %v2,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %addr)
+ i32* %addr)
ret void
}
-define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr) {
; CHECK-LABEL: st3w_f32:
; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -161,7 +161,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
<vscale x 4 x float> %v1,
<vscale x 4 x float> %v2,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %addr)
+ float* %addr)
ret void
}
@@ -169,7 +169,7 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; ST3D
;
-define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr) {
; CHECK-LABEL: st3d_i64:
; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -177,11 +177,11 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
<vscale x 2 x i64> %v1,
<vscale x 2 x i64> %v2,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %addr)
+ i64* %addr)
ret void
}
-define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr) {
; CHECK-LABEL: st3d_f64:
; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -189,7 +189,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
<vscale x 2 x double> %v1,
<vscale x 2 x double> %v2,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %addr)
+ double* %addr)
ret void
}
@@ -197,7 +197,7 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
; ST4B
;
-define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr) {
; CHECK-LABEL: st4b_i8:
; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
; CHECK-NEXT: ret
@@ -206,7 +206,7 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
<vscale x 16 x i8> %v2,
<vscale x 16 x i8> %v3,
<vscale x 16 x i1> %pred,
- <vscale x 16 x i8>* %addr)
+ i8* %addr)
ret void
}
@@ -214,7 +214,7 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
; ST4H
;
-define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr) {
; CHECK-LABEL: st4h_i16:
; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -223,11 +223,11 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
<vscale x 8 x i16> %v2,
<vscale x 8 x i16> %v3,
<vscale x 8 x i1> %pred,
- <vscale x 8 x i16>* %addr)
+ i16* %addr)
ret void
}
-define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr) {
; CHECK-LABEL: st4h_f16:
; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -236,7 +236,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
<vscale x 8 x half> %v2,
<vscale x 8 x half> %v3,
<vscale x 8 x i1> %pred,
- <vscale x 8 x half>* %addr)
+ half* %addr)
ret void
}
@@ -244,7 +244,7 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
; ST4W
;
-define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr) {
; CHECK-LABEL: st4w_i32:
; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -253,11 +253,11 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
<vscale x 4 x i32> %v2,
<vscale x 4 x i32> %v3,
<vscale x 4 x i1> %pred,
- <vscale x 4 x i32>* %addr)
+ i32* %addr)
ret void
}
-define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr) {
; CHECK-LABEL: st4w_f32:
; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -266,7 +266,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
<vscale x 4 x float> %v2,
<vscale x 4 x float> %v3,
<vscale x 4 x i1> %pred,
- <vscale x 4 x float>* %addr)
+ float* %addr)
ret void
}
@@ -274,7 +274,7 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
; ST4D
;
-define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr) {
; CHECK-LABEL: st4d_i64:
; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -283,11 +283,11 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
<vscale x 2 x i64> %v2,
<vscale x 2 x i64> %v3,
<vscale x 2 x i1> %pred,
- <vscale x 2 x i64>* %addr)
+ i64* %addr)
ret void
}
-define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr) {
; CHECK-LABEL: st4d_f64:
; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -296,7 +296,7 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
<vscale x 2 x double> %v2,
<vscale x 2 x double> %v3,
<vscale x 2 x i1> %pred,
- <vscale x 2 x double>* %addr)
+ double* %addr)
ret void
}
@@ -387,29 +387,29 @@ define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, do
}
-declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+
+declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
+
+declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
+declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
+declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
+declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
+declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
+declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
More information about the llvm-commits
mailing list