[Mlir-commits] [clang] [llvm] [mlir] [AArch64][llvm] Improve codegen for svldr_vnum_za/svstr_vnum_za (PR #175785)
Jonathan Thackray
llvmlistbot at llvm.org
Tue Apr 21 09:08:33 PDT 2026
https://github.com/jthackray updated https://github.com/llvm/llvm-project/pull/175785
>From 67e60bc2fabe362929a0a6b721403b454bbd0740 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Tue, 13 Jan 2026 14:37:20 +0000
Subject: [PATCH] [AArch64][llvm] Improve codegen for
svldr_vnum_za/svstr_vnum_za
When compiling `svldr_vnum_za` or `svstr_vnum_za`, the output
assembly has a superfluous `SXTW` instruction (gcc doesn't add
this); this should be excised, see https://godbolt.org/z/sz4s79rf8
In clang we're using int64_t, and `i32` in llvm. The extra `SXTW`
is due to a call to `DAG.getNode(ISD::SIGN_EXTEND...)`. Make them
both 64bit to make the extra `SXTW` go away.
---
clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 6 +-
.../AArch64/sme-intrinsics/acle_sme_ldr.c | 22 ++--
.../AArch64/sme-intrinsics/acle_sme_str.c | 22 ++--
llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 16 +--
.../CodeGen/AArch64/sme-intrinsics-loads.ll | 117 +++++++++---------
.../CodeGen/AArch64/sme-intrinsics-stores.ll | 117 +++++++++---------
.../Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td | 2 +-
mlir/test/Target/LLVMIR/arm-sme.mlir | 3 +-
9 files changed, 147 insertions(+), 160 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index c7dc15b8cadc8..832067e68541b 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -3839,10 +3839,10 @@ Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
if (Ops.size() == 2)
- Ops.push_back(Builder.getInt32(0));
+ Ops.push_back(Builder.getInt64(0));
else
- Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
- Function *F = CGM.getIntrinsic(IntID, Ops[1]->getType());
+ Ops[2] = Builder.CreateIntCast(Ops[2], Int64Ty, true);
+ Function *F = CGM.getIntrinsic(IntID, {});
return Builder.CreateCall(F, Ops);
}
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ldr.c
index e1a65542dc912..3d6c3d13931c3 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ldr.c
@@ -9,13 +9,13 @@
// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-CXX-NEXT: ret void
//
void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_out("za") {
@@ -25,13 +25,13 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) __arm_out("za") {
// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za_1(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 15)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 15)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svldr_vnum_za_1jPKv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 15)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 15)
// CHECK-CXX-NEXT: ret void
//
void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_out("za") {
@@ -41,13 +41,13 @@ void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) __arm_out("za")
// CHECK-C-LABEL: define dso_local void @test_svldr_za(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z13test_svldr_zajPKv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-CXX-NEXT: ret void
//
void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_out("za") {
@@ -57,15 +57,13 @@ void test_svldr_za(uint32_t slice_base, const void *ptr) __arm_out("za") {
// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za_var(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: [[TMP0:%.*]] = trunc i64 [[VNUM]] to i32
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 [[TMP0]])
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 [[VNUM]])
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z22test_svldr_vnum_za_varjPKvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = trunc i64 [[VNUM]] to i32
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 [[TMP0]])
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 [[VNUM]])
// CHECK-CXX-NEXT: ret void
//
void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum) __arm_out("za") {
@@ -75,13 +73,13 @@ void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, int64_t vnum)
// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za_2(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 16)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 16)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svldr_vnum_za_2jPKv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 16)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]], i64 16)
// CHECK-CXX-NEXT: ret void
//
void test_svldr_vnum_za_2(uint32_t slice_base, const void *ptr) __arm_out("za") {
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_str.c
index 64203ebe71d8e..e6c96ef4281f7 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_str.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_str.c
@@ -9,13 +9,13 @@
// CHECK-C-LABEL: define dso_local void @test_svstr_vnum_za(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z18test_svstr_vnum_zajPv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-CXX-NEXT: ret void
//
void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_in("za") {
@@ -25,13 +25,13 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) __arm_in("za") {
// CHECK-C-LABEL: define dso_local void @test_svstr_vnum_za_1(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 15)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 15)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svstr_vnum_za_1jPv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 15)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 15)
// CHECK-CXX-NEXT: ret void
//
void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_in("za") {
@@ -41,13 +41,13 @@ void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) __arm_in("za") {
// CHECK-C-LABEL: define dso_local void @test_svstr_za(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z13test_svstr_zajPv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 0)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 0)
// CHECK-CXX-NEXT: ret void
//
void test_svstr_za(uint32_t slice_base, void *ptr) __arm_in("za") {
@@ -57,15 +57,13 @@ void test_svstr_za(uint32_t slice_base, void *ptr) __arm_in("za") {
// CHECK-C-LABEL: define dso_local void @test_svstr_vnum_za_var(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: [[TMP0:%.*]] = trunc i64 [[VNUM]] to i32
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 [[TMP0]])
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 [[VNUM]])
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z22test_svstr_vnum_za_varjPvl(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = trunc i64 [[VNUM]] to i32
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 [[TMP0]])
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 [[VNUM]])
// CHECK-CXX-NEXT: ret void
//
void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_in("za") {
@@ -75,13 +73,13 @@ void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, int64_t vnum) __arm_
// CHECK-C-LABEL: define dso_local void @test_svstr_vnum_za_2(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-C-NEXT: entry:
-// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 16)
+// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 16)
// CHECK-C-NEXT: ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svstr_vnum_za_2jPv(
// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT: entry:
-// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str.p0(i32 [[SLICE_BASE]], ptr [[PTR]], i32 16)
+// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[SLICE_BASE]], ptr [[PTR]], i64 16)
// CHECK-CXX-NEXT: ret void
//
void test_svstr_vnum_za_2(uint32_t slice_base, void *ptr) __arm_in("za") {
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 578f54561910b..1f39b1de81bbc 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2966,7 +2966,7 @@ let TargetPrefix = "aarch64" in {
// Spill + fill
class SME_LDR_STR_ZA_Intrinsic
- : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly]>;
+ : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty, llvm_i64_ty], [IntrInaccessibleMemOrArgMemOnly]>;
def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic;
def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2b08a89566de9..1222973e94294 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6196,7 +6196,7 @@ SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
SDValue TileSlice = N->getOperand(2);
SDValue Base = N->getOperand(3);
SDValue VecNum = N->getOperand(4);
- int32_t ConstAddend = 0;
+ int64_t ConstAddend = 0;
SDValue VarAddend = VecNum;
// If the vnum is an add of an immediate, we can fold it into the instruction
@@ -6210,10 +6210,10 @@ SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
}
int32_t ImmAddend = ConstAddend % 16;
- if (int32_t C = (ConstAddend - ImmAddend)) {
- SDValue CVal = DAG.getTargetConstant(C, DL, MVT::i32);
+ if (int64_t C = (ConstAddend - ImmAddend)) {
+ SDValue CVal = DAG.getTargetConstant(C, DL, MVT::i64);
VarAddend = VarAddend
- ? DAG.getNode(ISD::ADD, DL, MVT::i32, {VarAddend, CVal})
+ ? DAG.getNode(ISD::ADD, DL, MVT::i64, {VarAddend, CVal})
: CVal;
}
@@ -6223,12 +6223,12 @@ SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
DAG.getConstant(1, DL, MVT::i32));
// Multiply SVL and vnum then add it to the base
- SDValue Mul = DAG.getNode(
- ISD::MUL, DL, MVT::i64,
- {SVL, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VarAddend)});
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, {SVL, VarAddend});
Base = DAG.getNode(ISD::ADD, DL, MVT::i64, {Base, Mul});
+
// Just add vnum to the tileslice
- TileSlice = DAG.getNode(ISD::ADD, DL, MVT::i32, {TileSlice, VarAddend});
+ SDValue VarAddend32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, VarAddend);
+ TileSlice = DAG.getNode(ISD::ADD, DL, MVT::i32, {TileSlice, VarAddend32});
}
return DAG.getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR,
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
index d40618f2678b6..22b27831f4b3d 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
@@ -252,7 +252,7 @@ define void @ldr(ptr %ptr) {
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: ldr za[w12, 0], [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr, i32 0)
+ call void @llvm.aarch64.sme.ldr(i32 0, ptr %ptr, i64 0)
ret void;
}
@@ -264,7 +264,7 @@ define void @ldr_with_off_15(ptr %ptr) {
; CHECK-NEXT: ldr za[w12, 0], [x8]
; CHECK-NEXT: ret
%base = getelementptr i8, ptr %ptr, i64 15
- call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i32 0)
+ call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i64 0)
ret void;
}
@@ -278,7 +278,7 @@ define void @ldr_with_off_15mulvl(ptr %ptr) {
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 240
%base = getelementptr i8, ptr %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i32 0)
+ call void @llvm.aarch64.sme.ldr(i32 15, ptr %base, i64 0)
ret void;
}
@@ -292,20 +292,19 @@ define void @ldr_with_off_16mulvl(ptr %ptr) {
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 256
%base = getelementptr i8, ptr %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 0)
+ call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i64 0)
ret void;
}
-define void @ldr_with_off_var(ptr %base, i32 %off) {
+define void @ldr_with_off_var(ptr %base, i64 %off) {
; CHECK-LABEL: ldr_with_off_var:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w1
-; CHECK-NEXT: rdsvl x9, #1
+; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: add w12, w1, #16
-; CHECK-NEXT: madd x8, x9, x8, x0
+; CHECK-NEXT: madd x8, x8, x1, x0
; CHECK-NEXT: ldr za[w12, 0], [x8]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 %off)
+ call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i64 %off)
ret void;
}
@@ -315,7 +314,7 @@ define void @ldr_with_off_15imm(ptr %base) {
; CHECK-NEXT: mov w12, #16 // =0x10
; CHECK-NEXT: ldr za[w12, 15], [x0, #15, mul vl]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 15)
+ call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i64 15)
ret void;
}
@@ -327,7 +326,7 @@ define void @ldr_with_off_16imm(ptr %base) {
; CHECK-NEXT: add x8, x0, x8, lsl #4
; CHECK-NEXT: ldr za[w12, 0], [x8]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 16)
+ call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i64 16)
ret void;
}
@@ -341,10 +340,10 @@ define void @ldr_with_off_many_imm(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: ldr za[w12, 4], [x1, #4, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 1)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 2)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 3)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 4)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 1)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 2)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 3)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 4)
ret void
}
@@ -361,10 +360,10 @@ define void @ldr_with_off_many_imm_15_18(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 15)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 16)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 17)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 18)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 15)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 16)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 17)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 18)
ret void
}
@@ -380,10 +379,10 @@ define void @ldr_with_off_many_imm_16_19(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 16)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 17)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 18)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 19)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 16)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 17)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 18)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 19)
ret void
}
@@ -401,10 +400,10 @@ define void @ldr_with_off_many_imm_31_34(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 31)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 32)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 33)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 34)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 31)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 32)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 33)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 34)
ret void
}
@@ -420,60 +419,56 @@ define void @ldr_with_off_many_imm_32_35(i32 %tile_slice, ptr %ptr, i64 %vnum) {
; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 32)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 33)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 34)
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 35)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 32)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 33)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 34)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 35)
ret void
}
define void @ldr_with_off_many_var(i32 %tile_slice, ptr %ptr, i64 %vnum) {
; CHECK-LABEL: ldr_with_off_many_var:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sxtw x8, w2
-; CHECK-NEXT: rdsvl x9, #1
+; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: add w12, w0, w2
-; CHECK-NEXT: madd x8, x9, x8, x1
+; CHECK-NEXT: madd x8, x8, x2, x1
; CHECK-NEXT: ldr za[w12, 0], [x8]
; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl]
; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl]
; CHECK-NEXT: ret
entry:
- %0 = trunc i64 %vnum to i32
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %0)
- %1 = add i32 %0, 1
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %1)
- %2 = add i32 %0, 2
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %2)
- %3 = add i32 %0, 3
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %3)
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %vnum)
+ %1 = add i64 %vnum, 1
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %1)
+ %2 = add i64 %vnum, 2
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %2)
+ %3 = add i64 %vnum, 3
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %3)
ret void
}
define void @ldr_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) {
; CHECK-LABEL: ldr_with_off_many_var_high:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: add w8, w2, #32
-; CHECK-NEXT: rdsvl x10, #1
-; CHECK-NEXT: sxtw x9, w8
-; CHECK-NEXT: add w12, w0, w8
-; CHECK-NEXT: madd x9, x10, x9, x1
-; CHECK-NEXT: ldr za[w12, 1], [x9, #1, mul vl]
-; CHECK-NEXT: ldr za[w12, 2], [x9, #2, mul vl]
-; CHECK-NEXT: ldr za[w12, 3], [x9, #3, mul vl]
-; CHECK-NEXT: ldr za[w12, 4], [x9, #4, mul vl]
+; CHECK-NEXT: rdsvl x8, #1
+; CHECK-NEXT: add x9, x2, #32
+; CHECK-NEXT: madd x8, x8, x9, x1
+; CHECK-NEXT: add w12, w0, w9
+; CHECK-NEXT: ldr za[w12, 1], [x8, #1, mul vl]
+; CHECK-NEXT: ldr za[w12, 2], [x8, #2, mul vl]
+; CHECK-NEXT: ldr za[w12, 3], [x8, #3, mul vl]
+; CHECK-NEXT: ldr za[w12, 4], [x8, #4, mul vl]
; CHECK-NEXT: ret
entry:
- %0 = trunc i64 %vnum to i32
- %1 = add i32 %0, 33
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %1)
- %2 = add i32 %0, 34
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %2)
- %3 = add i32 %0, 35
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %3)
- %4 = add i32 %0, 36
- tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i32 %4)
+ %1 = add i64 %vnum, 33
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %1)
+ %2 = add i64 %vnum, 34
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %2)
+ %3 = add i64 %vnum, 35
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %3)
+ %4 = add i64 %vnum, 36
+ tail call void @llvm.aarch64.sme.ldr(i32 %tile_slice, ptr %ptr, i64 %4)
ret void
}
@@ -522,5 +517,5 @@ declare void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1>, ptr, i32, i32)
declare void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1>, ptr, i32, i32)
declare void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1>, ptr, i32, i32)
-declare void @llvm.aarch64.sme.ldr(i32, ptr, i32)
+declare void @llvm.aarch64.sme.ldr(i32, ptr, i64)
declare i64 @llvm.vscale.i64()
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
index 03c1f28fbaa18..7f0361254625c 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
@@ -252,7 +252,7 @@ define void @str(ptr %ptr) {
; CHECK-NEXT: mov w12, wzr
; CHECK-NEXT: str za[w12, 0], [x0]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.str(i32 0, ptr %ptr, i32 0)
+ call void @llvm.aarch64.sme.str(i32 0, ptr %ptr, i64 0)
ret void;
}
@@ -264,7 +264,7 @@ define void @str_with_off_15(ptr %ptr) {
; CHECK-NEXT: str za[w12, 0], [x8]
; CHECK-NEXT: ret
%base = getelementptr i8, ptr %ptr, i64 15
- call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 0)
+ call void @llvm.aarch64.sme.str(i32 15, ptr %base, i64 0)
ret void;
}
@@ -278,7 +278,7 @@ define void @str_with_off_15mulvl(ptr %ptr) {
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 240
%base = getelementptr i8, ptr %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 0)
+ call void @llvm.aarch64.sme.str(i32 15, ptr %base, i64 0)
ret void;
}
@@ -292,20 +292,19 @@ define void @str_with_off_16mulvl(ptr %ptr) {
%vscale = call i64 @llvm.vscale.i64()
%mulvl = mul i64 %vscale, 256
%base = getelementptr i8, ptr %ptr, i64 %mulvl
- call void @llvm.aarch64.sme.str(i32 16, ptr %base, i32 0)
+ call void @llvm.aarch64.sme.str(i32 16, ptr %base, i64 0)
ret void;
}
-define void @str_with_off_var(ptr %base, i32 %off) {
+define void @str_with_off_var(ptr %base, i64 %off) {
; CHECK-LABEL: str_with_off_var:
; CHECK: // %bb.0:
-; CHECK-NEXT: sxtw x8, w1
-; CHECK-NEXT: rdsvl x9, #1
+; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: add w12, w1, #16
-; CHECK-NEXT: madd x8, x9, x8, x0
+; CHECK-NEXT: madd x8, x8, x1, x0
; CHECK-NEXT: str za[w12, 0], [x8]
; CHECK-NEXT: ret
- call void @llvm.aarch64.sme.str(i32 16, ptr %base, i32 %off)
+ call void @llvm.aarch64.sme.str(i32 16, ptr %base, i64 %off)
ret void;
}
@@ -317,7 +316,7 @@ define void @str_with_off_15imm(ptr %ptr) {
; CHECK-NEXT: str za[w12, 15], [x8, #15, mul vl]
; CHECK-NEXT: ret
%base = getelementptr i8, ptr %ptr, i64 15
- call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 15)
+ call void @llvm.aarch64.sme.str(i32 15, ptr %base, i64 15)
ret void;
}
@@ -331,7 +330,7 @@ define void @str_with_off_16imm(ptr %ptr) {
; CHECK-NEXT: str za[w12, 0], [x8]
; CHECK-NEXT: ret
%base = getelementptr i8, ptr %ptr, i64 15
- call void @llvm.aarch64.sme.str(i32 15, ptr %base, i32 16)
+ call void @llvm.aarch64.sme.str(i32 15, ptr %base, i64 16)
ret void;
}
@@ -345,10 +344,10 @@ define void @str_with_off_many_imm(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: str za[w12, 4], [x1, #4, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 1)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 2)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 3)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 4)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 1)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 2)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 3)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 4)
ret void
}
@@ -365,10 +364,10 @@ define void @str_with_off_many_imm_15_18(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: str za[w12, 2], [x8, #2, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 15)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 16)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 17)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 18)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 15)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 16)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 17)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 18)
ret void
}
@@ -384,10 +383,10 @@ define void @str_with_off_many_imm_16_19(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: str za[w12, 3], [x8, #3, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 16)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 17)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 18)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 19)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 16)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 17)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 18)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 19)
ret void
}
@@ -405,10 +404,10 @@ define void @str_with_off_many_imm_31_34(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: str za[w13, 2], [x8, #2, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 31)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 32)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 33)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 34)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 31)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 32)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 33)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 34)
ret void
}
@@ -424,60 +423,56 @@ define void @str_with_off_many_imm_32_35(i32 %tile_slice, ptr %ptr) {
; CHECK-NEXT: str za[w12, 3], [x8, #3, mul vl]
; CHECK-NEXT: ret
entry:
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 32)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 33)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 34)
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 35)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 32)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 33)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 34)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 35)
ret void
}
define void @str_with_off_many_var(i32 %tile_slice, ptr %ptr, i64 %vnum) {
; CHECK-LABEL: str_with_off_many_var:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sxtw x8, w2
-; CHECK-NEXT: rdsvl x9, #1
+; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: add w12, w0, w2
-; CHECK-NEXT: madd x8, x9, x8, x1
+; CHECK-NEXT: madd x8, x8, x2, x1
; CHECK-NEXT: str za[w12, 0], [x8]
; CHECK-NEXT: str za[w12, 1], [x8, #1, mul vl]
; CHECK-NEXT: str za[w12, 2], [x8, #2, mul vl]
; CHECK-NEXT: str za[w12, 3], [x8, #3, mul vl]
; CHECK-NEXT: ret
entry:
- %0 = trunc i64 %vnum to i32
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %0)
- %1 = add i32 %0, 1
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %1)
- %2 = add i32 %0, 2
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %2)
- %3 = add i32 %0, 3
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %3)
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %vnum)
+ %1 = add i64 %vnum, 1
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %1)
+ %2 = add i64 %vnum, 2
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %2)
+ %3 = add i64 %vnum, 3
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %3)
ret void
}
define void @str_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) {
; CHECK-LABEL: str_with_off_many_var_high:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: add w8, w2, #32
-; CHECK-NEXT: rdsvl x10, #1
-; CHECK-NEXT: sxtw x9, w8
-; CHECK-NEXT: add w12, w0, w8
-; CHECK-NEXT: madd x9, x10, x9, x1
-; CHECK-NEXT: str za[w12, 1], [x9, #1, mul vl]
-; CHECK-NEXT: str za[w12, 2], [x9, #2, mul vl]
-; CHECK-NEXT: str za[w12, 3], [x9, #3, mul vl]
-; CHECK-NEXT: str za[w12, 4], [x9, #4, mul vl]
+; CHECK-NEXT: rdsvl x8, #1
+; CHECK-NEXT: add x9, x2, #32
+; CHECK-NEXT: madd x8, x8, x9, x1
+; CHECK-NEXT: add w12, w0, w9
+; CHECK-NEXT: str za[w12, 1], [x8, #1, mul vl]
+; CHECK-NEXT: str za[w12, 2], [x8, #2, mul vl]
+; CHECK-NEXT: str za[w12, 3], [x8, #3, mul vl]
+; CHECK-NEXT: str za[w12, 4], [x8, #4, mul vl]
; CHECK-NEXT: ret
entry:
- %0 = trunc i64 %vnum to i32
- %1 = add i32 %0, 33
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %1)
- %2 = add i32 %0, 34
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %2)
- %3 = add i32 %0, 35
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %3)
- %4 = add i32 %0, 36
- tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i32 %4)
+ %1 = add i64 %vnum, 33
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %1)
+ %2 = add i64 %vnum, 34
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %2)
+ %3 = add i64 %vnum, 35
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %3)
+ %4 = add i64 %vnum, 36
+ tail call void @llvm.aarch64.sme.str(i32 %tile_slice, ptr %ptr, i64 %4)
ret void
}
@@ -526,5 +521,5 @@ declare void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1>, ptr, i32, i32)
declare void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1>, ptr, i32, i32)
declare void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1>, ptr, i32, i32)
-declare void @llvm.aarch64.sme.str(i32, ptr, i32)
+declare void @llvm.aarch64.sme.str(i32, ptr, i64)
declare i64 @llvm.vscale.i64()
diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
index 0e3bb9e81f30b..40385b142b48f 100644
--- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
@@ -163,7 +163,7 @@ def LLVM_aarch64_sme_str
/*overloadedOperands=*/[1]>,
Arguments<(ins Arg<I32, "Index">:$index,
Arg<LLVM_AnyPointer, "Store address", [MemWrite]>:$store_address,
- Arg<I32, "Offset">:$offset)>;
+ Arg<I64, "Offset">:$offset)>;
// Vector to tile slice
class LLVM_aarch64_sme_write<string direction>
diff --git a/mlir/test/Target/LLVMIR/arm-sme.mlir b/mlir/test/Target/LLVMIR/arm-sme.mlir
index 0a13a75618a23..ef37bfdffeed9 100644
--- a/mlir/test/Target/LLVMIR/arm-sme.mlir
+++ b/mlir/test/Target/LLVMIR/arm-sme.mlir
@@ -190,6 +190,7 @@ llvm.func @arm_sme_store(%nxv1i1 : vector<[1]xi1>,
%nxv16i1 : vector<[16]xi1>,
%ptr : !llvm.ptr) {
%c0 = llvm.mlir.constant(0 : index) : i32
+ %c0_i64 = llvm.mlir.constant(0 : i64) : i64
// CHECK: call void @llvm.aarch64.sme.st1q.horiz
"arm_sme.intr.st1q.horiz"(%nxv1i1, %ptr, %c0) <{tile_id = 0 : i32}> :
(vector<[1]xi1>, !llvm.ptr, i32) -> ()
@@ -221,7 +222,7 @@ llvm.func @arm_sme_store(%nxv1i1 : vector<[1]xi1>,
"arm_sme.intr.st1b.vert"(%nxv16i1, %ptr, %c0) <{tile_id = 0 : i32}> :
(vector<[16]xi1>, !llvm.ptr, i32) -> ()
// CHECK: call void @llvm.aarch64.sme.str
- "arm_sme.intr.str"(%c0, %ptr, %c0) : (i32, !llvm.ptr, i32) -> ()
+ "arm_sme.intr.str"(%c0, %ptr, %c0_i64) : (i32, !llvm.ptr, i64) -> ()
llvm.return
}
More information about the Mlir-commits
mailing list