[clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68908)

Sam Tebbs via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 17 01:56:30 PDT 2023


https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68908

>From 000c99324a0bd63e92e0ac056c3ff46d2b92c53e Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 1/3] [AArch64][SME] Remove immediate argument restriction for
 svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, but since vnum is used
to modify the base register via a mul and add, that restriction is not
necessary. This patch removes that restriction.
---
 clang/include/clang/Basic/arm_sme.td             | 10 ++++------
 clang/lib/CodeGen/CGBuiltin.cpp                  | 13 ++++++++-----
 .../aarch64-sme-intrinsics/acle_sme_ldr.c        | 16 ++++++++++++++++
 .../aarch64-sme-intrinsics/acle_sme_str.c        | 15 +++++++++++++++
 .../Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp |  8 --------
 5 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index d014900d719c338..8d85327a86b1aaf 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -44,10 +44,9 @@ defm SVLD1_ZA32 : ZALoad<"za32", "i", "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0
 defm SVLD1_ZA64 : ZALoad<"za64", "l", "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>]>;
 defm SVLD1_ZA128 : ZALoad<"za128", "q", "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>]>;
 
-def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQi", "",
+def SVLDR_VNUM_ZA : MInst<"svldr_vnum_za", "vmQl", "",
                           [IsOverloadNone, IsStreamingCompatible, IsSharedZA],
-                          MemEltTyDefault, "aarch64_sme_ldr",
-                          [ImmCheck<2, ImmCheck0_15>]>;
+                          MemEltTyDefault, "aarch64_sme_ldr">;
 
 def SVLDR_ZA : MInst<"svldr_za", "vmQ", "",
                           [IsOverloadNone, IsStreamingCompatible, IsSharedZA],
@@ -82,10 +81,9 @@ defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck
 defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>]>;
 defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>]>;
 
-def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vm%i", "",
+def SVSTR_VNUM_ZA : MInst<"svstr_vnum_za", "vm%l", "",
                           [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA],
-                          MemEltTyDefault, "aarch64_sme_str",
-                          [ImmCheck<2, ImmCheck0_15>]>;
+                          MemEltTyDefault, "aarch64_sme_str">;
 
 def SVSTR_ZA : MInst<"svstr_za", "vm%", "",
                       [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA],
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bf984861bccb5cc..a8632e0254acaa6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9716,13 +9716,16 @@ Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   if (Ops.size() == 3) {
     Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
     llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-    llvm::Value *MulVL = Builder.CreateMul(
-        CntsbCall,
-        Builder.getInt64(cast<llvm::ConstantInt>(Ops[2])->getZExtValue()),
-        "mulvl");
+
+    llvm::Value *VecNum = Ops[2];
+    if (auto *C = dyn_cast<ConstantInt>(VecNum))
+      VecNum = Builder.getInt64(C->getZExtValue());
+
+    llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
 
     Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-    Ops[0] = EmitTileslice(Ops[0], Ops[2]);
+    Ops[0] =
+        EmitTileslice(Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true));
     Ops.erase(&Ops[2]);
   }
   Function *F = CGM.getIntrinsic(IntID, {});
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index acddc2ef50a3ddf..96b9b99b2892f9e 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -34,6 +34,22 @@ void test_svldr_vnum_za_1(uint32_t slice_base, const void *ptr) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE:%.*]], ptr [[PTR:%.*]])
 // CHECK-NEXT:    ret void
+//
 void test_svldr_za(uint32_t slice_base, const void *ptr) {
   svldr_za(slice_base, ptr);
 }
+
+// CHECK-C-LABEL: @test_svldr_vnum_za_var(
+// CHECK-CXX-LABEL: @_Z22test_svldr_vnum_za_varjPKvm(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT:    [[MULVL:%.*]] = mul i64 [[SVLB]], [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[VNUM:%.*]] to i32
+// CHECK-NEXT:    [[TILESLICE:%.*]] = add i32 [[TMP1]], [[SLICE_BASE:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+void test_svldr_vnum_za_var(uint32_t slice_base, const void *ptr, uint64_t vnum) {
+  svldr_vnum_za(slice_base, ptr, vnum);
+}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
index 2728f9ac0cd12d3..12da0030297d15b 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
@@ -38,3 +38,18 @@ void test_svstr_vnum_za_1(uint32_t slice_base, void *ptr) {
 void test_svstr_za(uint32_t slice_base, void *ptr) {
   svstr_za(slice_base, ptr);
 }
+
+// CHECK-C-LABEL: @test_svstr_vnum_za_var(
+// CHECK-CXX-LABEL: @_Z22test_svstr_vnum_za_varjPvm(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT:    [[MULVL:%.*]] = mul i64 [[SVLB]], [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[VNUM:%.*]] to i32
+// CHECK-NEXT:    [[TILESLICE:%.*]] = add i32 [[TMP1]], [[SLICE_BASE:%.*]]
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+void test_svstr_vnum_za_var(uint32_t slice_base, void *ptr, uint64_t vnum) {
+  svstr_vnum_za(slice_base, ptr, vnum);
+}
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
index 7475fd53b80ba2b..1faa5638c801c2d 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp
@@ -143,11 +143,6 @@ void test_range_0_15(uint32_t slice, svbool_t pg, void *ptr) {
   // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
   SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(16, slice, pg, ptr, 1);
 
-  // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
-  SVE_ACLE_FUNC(svldr_vnum_za,,,)(-1, ptr, 16);
-  // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
-  SVE_ACLE_FUNC(svstr_vnum_za,,,)(-1, ptr, -1);
-
   // expected-error at +1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
   SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, -1, slice);
   // expected-error at +1 {{argument value 16 is outside the valid range [0, 15]}}
@@ -171,9 +166,6 @@ void test_constant(uint64_t u64, svbool_t pg, void *ptr) {
   SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(u64, 0, pg, ptr, u64);  // expected-error {{argument to 'svld1_hor_vnum_za8' must be a constant integer}}
   SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}}
 
-  SVE_ACLE_FUNC(svldr_vnum_za,,,)(u64, ptr, u64); // expected-error {{argument to 'svldr_vnum_za' must be a constant integer}}
-  SVE_ACLE_FUNC(svstr_vnum_za,,,)(u64, ptr, u64); // expected-error {{argument to 'svstr_vnum_za' must be a constant integer}}
-
   SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, u64, 0);  // expected-error-re {{argument to 'svread_ver_za16{{.*}}_m' must be a constant integer}}
   SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(u64, 0, pg, svundef_s64()); // expected-error-re {{argument to 'svwrite_ver_za64{{.*}}_m' must be a constant integer}}
 }

>From cbbb4e2ab3598c05fd817f5bd2665b862afc7170 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 17 Oct 2023 09:55:41 +0100
Subject: [PATCH 2/3] fixup! inline EmitTileslice

---
 clang/lib/CodeGen/CGBuiltin.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a8632e0254acaa6..e036cc8db421bc0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9653,11 +9653,6 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
   return Store;
 }
 
-Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
-  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int32Ty, false);
-  return Builder.CreateAdd(Base, CastOffset, "tileslice");
-}
-
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
                                       SmallVectorImpl<Value *> &Ops,
                                       unsigned IntID) {
@@ -9724,8 +9719,7 @@ Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
     llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
 
     Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-    Ops[0] =
-        EmitTileslice(Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true));
+    Ops[0] = Builder.CreateAdd(Builder.CreateIntCast(VecNum, Int32Ty, true), Builder.CreateIntCast(Ops[0], Int32Ty, false), "tileslice");
     Ops.erase(&Ops[2]);
   }
   Function *F = CGM.getIntrinsic(IntID, {});

>From 50e226a2cfe940ae4a5d3fd82f65ac0090e85f2b Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 17 Oct 2023 09:56:20 +0100
Subject: [PATCH 3/3] fixup! format

---
 clang/lib/CodeGen/CGBuiltin.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e036cc8db421bc0..d507ba7765941c8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9719,7 +9719,9 @@ Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
     llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
 
     Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-    Ops[0] = Builder.CreateAdd(Builder.CreateIntCast(VecNum, Int32Ty, true), Builder.CreateIntCast(Ops[0], Int32Ty, false), "tileslice");
+    Ops[0] = Builder.CreateAdd(Builder.CreateIntCast(VecNum, Int32Ty, true),
+                               Builder.CreateIntCast(Ops[0], Int32Ty, false),
+                               "tileslice");
     Ops.erase(&Ops[2]);
   }
   Function *F = CGM.getIntrinsic(IntID, {});



More information about the cfe-commits mailing list