[llvm] 180cc74 - [AArch64] Update SME load/store intrinsics to work on opaque pointers.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 28 01:50:31 PDT 2022
Author: Sander de Smalen
Date: 2022-06-28T09:50:11+01:00
New Revision: 180cc74de9cb083730a4f0591535ad5ec629ca55
URL: https://github.com/llvm/llvm-project/commit/180cc74de9cb083730a4f0591535ad5ec629ca55
DIFF: https://github.com/llvm/llvm-project/commit/180cc74de9cb083730a4f0591535ad5ec629ca55.diff
LOG: [AArch64] Update SME load/store intrinsics to work on opaque pointers.
These intrinsics should be able to use opaque pointers, because the
load/store type is already encoded in their names and return/operand type.
Reviewed By: c-rhodes
Differential Revision: https://reviews.llvm.org/D128505
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 2355ff6083bb..91c2385a3120 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2586,45 +2586,33 @@ def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic;
// Scalable Matrix Extension (SME) Intrinsics
let TargetPrefix = "aarch64" in {
- class SME_Load_Store_B_Intrinsic
+ class SME_Load_Store_Intrinsic<LLVMType pred_ty>
: DefaultAttrsIntrinsic<[],
- [llvm_nxv16i1_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>;
- class SME_Load_Store_H_Intrinsic
- : DefaultAttrsIntrinsic<[],
- [llvm_nxv16i1_ty, LLVMPointerType<llvm_i16_ty>, llvm_i64_ty, llvm_i32_ty], []>;
- class SME_Load_Store_S_Intrinsic
- : DefaultAttrsIntrinsic<[],
- [llvm_nxv16i1_ty, LLVMPointerType<llvm_i32_ty>, llvm_i64_ty, llvm_i32_ty], []>;
- class SME_Load_Store_D_Intrinsic
- : DefaultAttrsIntrinsic<[],
- [llvm_nxv16i1_ty, LLVMPointerType<llvm_i64_ty>, llvm_i64_ty, llvm_i32_ty], []>;
- class SME_Load_Store_Q_Intrinsic
- : DefaultAttrsIntrinsic<[],
- [llvm_nxv16i1_ty, LLVMPointerType<llvm_i128_ty>, llvm_i64_ty, llvm_i32_ty], []>;
+ [pred_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>;
// Loads
- def int_aarch64_sme_ld1b_horiz : SME_Load_Store_B_Intrinsic;
- def int_aarch64_sme_ld1h_horiz : SME_Load_Store_H_Intrinsic;
- def int_aarch64_sme_ld1w_horiz : SME_Load_Store_S_Intrinsic;
- def int_aarch64_sme_ld1d_horiz : SME_Load_Store_D_Intrinsic;
- def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Q_Intrinsic;
- def int_aarch64_sme_ld1b_vert : SME_Load_Store_B_Intrinsic;
- def int_aarch64_sme_ld1h_vert : SME_Load_Store_H_Intrinsic;
- def int_aarch64_sme_ld1w_vert : SME_Load_Store_S_Intrinsic;
- def int_aarch64_sme_ld1d_vert : SME_Load_Store_D_Intrinsic;
- def int_aarch64_sme_ld1q_vert : SME_Load_Store_Q_Intrinsic;
+ def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1h_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1w_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1d_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_ld1q_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
// Stores
- def int_aarch64_sme_st1b_horiz : SME_Load_Store_B_Intrinsic;
- def int_aarch64_sme_st1h_horiz : SME_Load_Store_H_Intrinsic;
- def int_aarch64_sme_st1w_horiz : SME_Load_Store_S_Intrinsic;
- def int_aarch64_sme_st1d_horiz : SME_Load_Store_D_Intrinsic;
- def int_aarch64_sme_st1q_horiz : SME_Load_Store_Q_Intrinsic;
- def int_aarch64_sme_st1b_vert : SME_Load_Store_B_Intrinsic;
- def int_aarch64_sme_st1h_vert : SME_Load_Store_H_Intrinsic;
- def int_aarch64_sme_st1w_vert : SME_Load_Store_S_Intrinsic;
- def int_aarch64_sme_st1d_vert : SME_Load_Store_D_Intrinsic;
- def int_aarch64_sme_st1q_vert : SME_Load_Store_Q_Intrinsic;
+ def int_aarch64_sme_st1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1h_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1w_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1d_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
+ def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
// Spill + fill
def int_aarch64_sme_ldr : DefaultAttrsIntrinsic<
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
index 9bfe6280e652..167e501ff3c1 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
-define void @ld1b(<vscale x 16 x i1> %pg, i8* %ptr, i32 %sliceidx) {
+define void @ld1b(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: ld1b:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w1
@@ -10,12 +10,12 @@ define void @ld1b(<vscale x 16 x i1> %pg, i8* %ptr, i32 %sliceidx) {
; CHECK-NEXT: ld1b {za0v.b[w13, 0]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, i8* %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, i8* %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
ret void;
}
-define void @ld1b_with_addr_offset(<vscale x 16 x i1> %pg, i8* %ptr, i64 %index, i32 %sliceidx) {
+define void @ld1b_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: ld1b_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -23,14 +23,14 @@ define void @ld1b_with_addr_offset(<vscale x 16 x i1> %pg, i8* %ptr, i64 %index,
; CHECK-NEXT: ld1b {za0h.b[w12, 0]}, p0/z, [x0, x1]
; CHECK-NEXT: ld1b {za0v.b[w13, 15]}, p0/z, [x0, x1]
; CHECK-NEXT: ret
- %base = getelementptr i8, i8* %ptr, i64 %index
+ %base = getelementptr i8, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, i8* %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, i8* %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
ret void;
}
-define void @ld1h(<vscale x 16 x i1> %pg, i16* %ptr, i32 %sliceidx) {
+define void @ld1h(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: ld1h:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w1
@@ -41,14 +41,14 @@ define void @ld1h(<vscale x 16 x i1> %pg, i16* %ptr, i32 %sliceidx) {
; CHECK-NEXT: ld1h {za1v.h[w12, 7]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1> %pg, i16* %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1> %pg, i16* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1> %pg, i16* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1> %pg, i16* %ptr, i64 1, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 %tileslice)
ret void;
}
-define void @ld1h_with_addr_offset(<vscale x 16 x i1> %pg, i16* %ptr, i64 %index, i32 %sliceidx) {
+define void @ld1h_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: ld1h_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w2
@@ -56,14 +56,14 @@ define void @ld1h_with_addr_offset(<vscale x 16 x i1> %pg, i16* %ptr, i64 %index
; CHECK-NEXT: ld1h {za0h.h[w12, 7]}, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ld1h {za1v.h[w13, 0]}, p0/z, [x0, x1, lsl #1]
; CHECK-NEXT: ret
- %base = getelementptr i16, i16* %ptr, i64 %index
+ %base = getelementptr i16, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1> %pg, i16* %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1> %pg, i16* %base, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1> %pg, ptr %base, i64 1, i32 0)
ret void;
}
-define void @ld1w(<vscale x 16 x i1> %pg, i32* %ptr, i32 %sliceidx) {
+define void @ld1w(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: ld1w:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -78,18 +78,18 @@ define void @ld1w(<vscale x 16 x i1> %pg, i32* %ptr, i32 %sliceidx) {
; CHECK-NEXT: ld1w {za3v.s[w12, 0]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 3, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 2, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
ret void;
}
-define void @ld1w_with_addr_offset(<vscale x 16 x i1> %pg, i32* %ptr, i64 %index, i32 %sliceidx) {
+define void @ld1w_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: ld1w_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w2
@@ -97,14 +97,14 @@ define void @ld1w_with_addr_offset(<vscale x 16 x i1> %pg, i32* %ptr, i64 %index
; CHECK-NEXT: ld1w {za0h.s[w13, 0]}, p0/z, [x0, x1, lsl #2]
; CHECK-NEXT: ld1w {za3v.s[w12, 3]}, p0/z, [x0, x1, lsl #2]
; CHECK-NEXT: ret
- %base = getelementptr i32, i32* %ptr, i64 %index
+ %base = getelementptr i32, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, i32* %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, i32* %base, i64 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1> %pg, ptr %base, i64 3, i32 %tileslice)
ret void;
}
-define void @ld1d(<vscale x 16 x i1> %pg, i64* %ptr, i32 %sliceidx) {
+define void @ld1d(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: ld1d:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w13, wzr
@@ -127,26 +127,26 @@ define void @ld1d(<vscale x 16 x i1> %pg, i64* %ptr, i32 %sliceidx) {
; CHECK-NEXT: ld1d {za7v.d[w12, 1]}, p0/z, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 4, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 7, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 %tileslice)
ret void;
}
-define void @ld1d_with_addr_offset(<vscale x 16 x i1> %pg, i64* %ptr, i64 %index, i32 %sliceidx) {
+define void @ld1d_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: ld1d_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w2
@@ -154,14 +154,14 @@ define void @ld1d_with_addr_offset(<vscale x 16 x i1> %pg, i64* %ptr, i64 %index
; CHECK-NEXT: ld1d {za0h.d[w12, 1]}, p0/z, [x0, x1, lsl #3]
; CHECK-NEXT: ld1d {za7v.d[w13, 0]}, p0/z, [x0, x1, lsl #3]
; CHECK-NEXT: ret
- %base = getelementptr i64, i64* %ptr, i64 %index
+ %base = getelementptr i64, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, i64* %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, i64* %base, i64 7, i32 0)
+ call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1> %pg, ptr %base, i64 7, i32 0)
ret void;
}
-define void @ld1q(<vscale x 16 x i1> %pg, i128* %ptr) {
+define void @ld1q(<vscale x 16 x i1> %pg, ptr %ptr) {
; CHECK-LABEL: ld1q:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -198,77 +198,77 @@ define void @ld1q(<vscale x 16 x i1> %pg, i128* %ptr) {
; CHECK-NEXT: ld1q {za14v.q[w12, 0]}, p0/z, [x0]
; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 15, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 15, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 8, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 9, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 10, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 11, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 12, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 13, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 14, i32 0)
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 15, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 8, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 9, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 10, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 11, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 12, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 13, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 14, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 15, i32 0)
ret void;
}
-define void @ld1q_with_addr_offset(<vscale x 16 x i1> %pg, i128* %ptr, i64 %index) {
+define void @ld1q_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index) {
; CHECK-LABEL: ld1q_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: ld1q {za0h.q[w12, 0]}, p0/z, [x0, x1, lsl #4]
; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0, x1, lsl #4]
; CHECK-NEXT: ret
- %base = getelementptr i128, i128* %ptr, i64 %index
- call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, i128* %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, i128* %base, i64 15, i32 0)
+ %base = getelementptr i128, ptr %ptr, i64 %index
+ call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
+ call void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1> %pg, ptr %base, i64 15, i32 0)
ret void;
}
-define void @ldr(i8* %ptr) {
+define void @ldr(ptr %ptr) {
; CHECK-LABEL: ldr:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: ldr za[w12, 0], [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.ldr(i32 0, i8* %ptr)
+ call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr)
ret void;
}
-define void @ldr_with_off_15(i8* %ptr) {
+define void @ldr_with_off_15(ptr %ptr) {
; CHECK-LABEL: ldr_with_off_15:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: add x8, x0, #15
; CHECK-NEXT: ldr za[w12, 0], [x8]
; CHECK-NEXT: ret
- %base = getelementptr i8, i8* %ptr, i64 15
- call void @llvm.aarch64.sme.ldr(i32 0, i8* %base)
+ %base = getelementptr i8, ptr %ptr, i64 15
+ call void @llvm.aarch64.sme.ldr(i32 0, ptr %base)
ret void;
}
-define void @ldr_with_off_15mulvl(i8* %ptr) {
+define void @ldr_with_off_15mulvl(ptr %ptr) {
; CHECK-LABEL: ldr_with_off_15mulvl:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -276,12 +276,12 @@ define void @ldr_with_off_15mulvl(i8* %ptr) {
; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 240
- %base = getelementptr i8, i8* %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.ldr(i32 0, i8* %base)
+ %base = getelementptr i8, ptr %ptr, i64 %mulvl
+ call void @llvm.aarch64.sme.ldr(i32 0, ptr %base)
ret void;
}
-define void @ldr_with_off_16mulvl(i8* %ptr) {
+define void @ldr_with_off_16mulvl(ptr %ptr) {
; CHECK-LABEL: ldr_with_off_16mulvl:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -290,21 +290,21 @@ define void @ldr_with_off_16mulvl(i8* %ptr) {
; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 256
- %base = getelementptr i8, i8* %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.ldr(i32 0, i8* %base)
+ %base = getelementptr i8, ptr %ptr, i64 %mulvl
+ call void @llvm.aarch64.sme.ldr(i32 0, ptr %base)
ret void;
}
-declare void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1>, i8*, i64, i32)
-declare void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1>, i16*, i64, i32)
-declare void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1>, i32*, i64, i32)
-declare void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1>, i64*, i64, i32)
-declare void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1>, i128*, i64, i32)
-declare void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1>, i8*, i64, i32)
-declare void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1>, i16*, i64, i32)
-declare void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1>, i32*, i64, i32)
-declare void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1>, i64*, i64, i32)
-declare void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1>, i128*, i64, i32)
+declare void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1h.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1w.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1d.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1q.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1h.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1w.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1d.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.ld1q.vert(<vscale x 16 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.ldr(i32, i8*)
+declare void @llvm.aarch64.sme.ldr(i32, ptr)
declare i64 @llvm.vscale.i64()
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
index fa2b7cae5162..00d078305d06 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
-define void @st1b(<vscale x 16 x i1> %pg, i8* %ptr, i32 %sliceidx) {
+define void @st1b(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: st1b:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w1
@@ -10,12 +10,12 @@ define void @st1b(<vscale x 16 x i1> %pg, i8* %ptr, i32 %sliceidx) {
; CHECK-NEXT: st1b {za0v.b[w13, 0]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, i8* %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, i8* %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
ret void;
}
-define void @st1b_with_addr_offset(<vscale x 16 x i1> %pg, i8* %ptr, i64 %index, i32 %sliceidx) {
+define void @st1b_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: st1b_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -23,14 +23,14 @@ define void @st1b_with_addr_offset(<vscale x 16 x i1> %pg, i8* %ptr, i64 %index,
; CHECK-NEXT: st1b {za0h.b[w12, 0]}, p0, [x0, x1]
; CHECK-NEXT: st1b {za0v.b[w13, 15]}, p0, [x0, x1]
; CHECK-NEXT: ret
- %base = getelementptr i8, i8* %ptr, i64 %index
+ %base = getelementptr i8, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 15
- call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, i8* %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, i8* %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
ret void;
}
-define void @st1h(<vscale x 16 x i1> %pg, i16* %ptr, i32 %sliceidx) {
+define void @st1h(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: st1h:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w1
@@ -41,14 +41,14 @@ define void @st1h(<vscale x 16 x i1> %pg, i16* %ptr, i32 %sliceidx) {
; CHECK-NEXT: st1h {za1v.h[w12, 7]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1> %pg, i16* %ptr, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1> %pg, i16* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1> %pg, i16* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1> %pg, i16* %ptr, i64 1, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 %tileslice)
ret void;
}
-define void @st1h_with_addr_offset(<vscale x 16 x i1> %pg, i16* %ptr, i64 %index, i32 %sliceidx) {
+define void @st1h_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: st1h_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w2
@@ -56,14 +56,14 @@ define void @st1h_with_addr_offset(<vscale x 16 x i1> %pg, i16* %ptr, i64 %index
; CHECK-NEXT: st1h {za0h.h[w12, 7]}, p0, [x0, x1, lsl #1]
; CHECK-NEXT: st1h {za1v.h[w13, 0]}, p0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
- %base = getelementptr i16, i16* %ptr, i64 %index
+ %base = getelementptr i16, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 7
- call void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1> %pg, i16* %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1> %pg, i16* %base, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1> %pg, ptr %base, i64 1, i32 0)
ret void;
}
-define void @st1w(<vscale x 16 x i1> %pg, i32* %ptr, i32 %sliceidx) {
+define void @st1w(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: st1w:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w13, wzr
@@ -78,18 +78,18 @@ define void @st1w(<vscale x 16 x i1> %pg, i32* %ptr, i32 %sliceidx) {
; CHECK-NEXT: st1w {za3v.s[w13, 0]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, i32* %ptr, i64 3, i32 %tileslice)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 2, i32 %tileslice)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, i32* %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
ret void;
}
-define void @st1w_with_addr_offset(<vscale x 16 x i1> %pg, i32* %ptr, i64 %index, i32 %sliceidx) {
+define void @st1w_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: st1w_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -97,14 +97,14 @@ define void @st1w_with_addr_offset(<vscale x 16 x i1> %pg, i32* %ptr, i64 %index
; CHECK-NEXT: st1w {za0h.s[w12, 0]}, p0, [x0, x1, lsl #2]
; CHECK-NEXT: st1w {za3v.s[w13, 3]}, p0, [x0, x1, lsl #2]
; CHECK-NEXT: ret
- %base = getelementptr i32, i32* %ptr, i64 %index
+ %base = getelementptr i32, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 3
- call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, i32* %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, i32* %base, i64 3, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1> %pg, ptr %base, i64 3, i32 %tileslice)
ret void;
}
-define void @st1d(<vscale x 16 x i1> %pg, i64* %ptr, i32 %sliceidx) {
+define void @st1d(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) {
; CHECK-LABEL: st1d:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w13, wzr
@@ -127,26 +127,26 @@ define void @st1d(<vscale x 16 x i1> %pg, i64* %ptr, i32 %sliceidx) {
; CHECK-NEXT: st1d {za7v.d[w12, 1]}, p0, [x0]
; CHECK-NEXT: ret
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 4, i32 %tileslice)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %ptr, i64 7, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 %tileslice)
ret void;
}
-define void @st1d_with_addr_offset(<vscale x 16 x i1> %pg, i64* %ptr, i64 %index, i32 %sliceidx) {
+define void @st1d_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) {
; CHECK-LABEL: st1d_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w2
@@ -154,14 +154,14 @@ define void @st1d_with_addr_offset(<vscale x 16 x i1> %pg, i64* %ptr, i64 %index
; CHECK-NEXT: st1d {za0h.d[w12, 1]}, p0, [x0, x1, lsl #3]
; CHECK-NEXT: st1d {za7v.d[w13, 0]}, p0, [x0, x1, lsl #3]
; CHECK-NEXT: ret
- %base = getelementptr i64, i64* %ptr, i64 %index
+ %base = getelementptr i64, ptr %ptr, i64 %index
%tileslice = add i32 %sliceidx, 1
- call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, i64* %base, i64 0, i32 %tileslice)
- call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, i64* %base, i64 7, i32 0)
+ call void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 %tileslice)
+ call void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1> %pg, ptr %base, i64 7, i32 0)
ret void;
}
-define void @st1q(<vscale x 16 x i1> %pg, i128* %ptr) {
+define void @st1q(<vscale x 16 x i1> %pg, ptr %ptr) {
; CHECK-LABEL: st1q:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -198,77 +198,77 @@ define void @st1q(<vscale x 16 x i1> %pg, i128* %ptr) {
; CHECK-NEXT: st1q {za14v.q[w12, 0]}, p0, [x0]
; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %ptr, i64 15, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 1, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 2, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 3, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 4, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 5, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 6, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 7, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 8, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 9, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 10, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 11, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 12, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 13, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 14, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %ptr, i64 15, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 8, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 9, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 10, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 11, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 12, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 13, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 14, i32 0)
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %ptr, i64 15, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 1, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 2, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 3, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 4, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 5, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 6, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 7, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 8, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 9, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 10, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 11, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 12, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 13, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 14, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %ptr, i64 15, i32 0)
ret void;
}
-define void @st1q_with_addr_offset(<vscale x 16 x i1> %pg, i128* %ptr, i64 %index) {
+define void @st1q_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index) {
; CHECK-LABEL: st1q_with_addr_offset:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: st1q {za0h.q[w12, 0]}, p0, [x0, x1, lsl #4]
; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0, x1, lsl #4]
; CHECK-NEXT: ret
- %base = getelementptr i128, i128* %ptr, i64 %index
- call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, i128* %base, i64 0, i32 0)
- call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, i128* %base, i64 15, i32 0)
+ %base = getelementptr i128, ptr %ptr, i64 %index
+ call void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1> %pg, ptr %base, i64 0, i32 0)
+ call void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1> %pg, ptr %base, i64 15, i32 0)
ret void;
}
-define void @str(i8* %ptr) {
+define void @str(ptr %ptr) {
; CHECK-LABEL: str:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: str za[w12, 0], [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.str(i32 0, i8* %ptr)
+ call void @llvm.aarch64.sme.str(i32 0, ptr %ptr)
ret void;
}
-define void @str_with_off_15(i8* %ptr) {
+define void @str_with_off_15(ptr %ptr) {
; CHECK-LABEL: str_with_off_15:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: add x8, x0, #15
; CHECK-NEXT: str za[w12, 0], [x8]
; CHECK-NEXT: ret
- %base = getelementptr i8, i8* %ptr, i64 15
- call void @llvm.aarch64.sme.str(i32 0, i8* %base)
+ %base = getelementptr i8, ptr %ptr, i64 15
+ call void @llvm.aarch64.sme.str(i32 0, ptr %base)
ret void;
}
-define void @str_with_off_15mulvl(i8* %ptr) {
+define void @str_with_off_15mulvl(ptr %ptr) {
; CHECK-LABEL: str_with_off_15mulvl:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -276,12 +276,12 @@ define void @str_with_off_15mulvl(i8* %ptr) {
; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 240
- %base = getelementptr i8, i8* %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.str(i32 0, i8* %base)
+ %base = getelementptr i8, ptr %ptr, i64 %mulvl
+ call void @llvm.aarch64.sme.str(i32 0, ptr %base)
ret void;
}
-define void @str_with_off_16mulvl(i8* %ptr) {
+define void @str_with_off_16mulvl(ptr %ptr) {
; CHECK-LABEL: str_with_off_16mulvl:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, wzr
@@ -290,21 +290,21 @@ define void @str_with_off_16mulvl(i8* %ptr) {
; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 256
- %base = getelementptr i8, i8* %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.str(i32 0, i8* %base)
+ %base = getelementptr i8, ptr %ptr, i64 %mulvl
+ call void @llvm.aarch64.sme.str(i32 0, ptr %base)
ret void;
}
-declare void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1>, i8*, i64, i32)
-declare void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1>, i16*, i64, i32)
-declare void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1>, i32*, i64, i32)
-declare void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1>, i64*, i64, i32)
-declare void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1>, i128*, i64, i32)
-declare void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1>, i8*, i64, i32)
-declare void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1>, i16*, i64, i32)
-declare void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1>, i32*, i64, i32)
-declare void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1>, i64*, i64, i32)
-declare void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1>, i128*, i64, i32)
+declare void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1h.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1w.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1d.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1q.horiz(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1h.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1w.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1d.vert(<vscale x 16 x i1>, ptr, i64, i32)
+declare void @llvm.aarch64.sme.st1q.vert(<vscale x 16 x i1>, ptr, i64, i32)
-declare void @llvm.aarch64.sme.str(i32, i8*)
+declare void @llvm.aarch64.sme.str(i32, ptr)
declare i64 @llvm.vscale.i64()
More information about the llvm-commits
mailing list