[clang] a8cbd27 - [Clang][AArch64] svldr_vnum/svstr_vnum should use cntsb iso vscale for the offset

Sander de Smalen via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 24 07:30:31 PDT 2023


Author: Sander de Smalen
Date: 2023-07-24T14:29:45Z
New Revision: a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f

URL: https://github.com/llvm/llvm-project/commit/a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f
DIFF: https://github.com/llvm/llvm-project/commit/a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f.diff

LOG: [Clang][AArch64] svldr_vnum/svstr_vnum should use cntsb iso vscale for the offset

The specification for LDR/STR says that:

  The ZA array vector is selected by the sum of the vector select register
  and immediate offset, modulo the number of bytes in a Streaming SVE
  vector. [..] This instruction does not require the PE to be in Streaming
  SVE mode

When the instruction is used outside of streaming mode, 'vscale' will result
in the wrong value being used for the offset because LLVM's code-generator
will emit the non-streaming 'RDVL/ADDVL' instead of the 'RDSVL/ADDSVL'
instructions which are used to get the Streaming-SVE vector length.

Reviewed By: bryanpkc

Differential Revision: https://reviews.llvm.org/D156121

Added: 
    

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
    clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b7fbafda0e5336..d5ad35c0c93602 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9508,11 +9508,11 @@ Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags,
                                       SmallVectorImpl<Value *> &Ops,
                                       unsigned IntID) {
-  Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty);
-  llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale");
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
   llvm::Value *MulVL = Builder.CreateMul(
-      VscaleCall,
-      Builder.getInt64(16 * cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
+      CntsbCall,
+      Builder.getInt64(cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
       "mulvl");
   Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL);
   Ops[0] = EmitTileslice(Ops[1], Ops[0]);

diff  --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index c96df003da6efc..7efa8b1556857c 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -18,8 +18,8 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
 // CHECK-C-LABEL: @test_svldr_vnum_za_1(
 // CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT:    [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT:    [[MULVL:%.*]] = mul i64 [[SVLB]], 15
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
 // CHECK-NEXT:    [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]])

diff  --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
index d4b1b9fa908acb..12aa298858a18e 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
@@ -18,8 +18,8 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
 // CHECK-C-LABEL: @test_svstr_vnum_za_1(
 // CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT:    [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT:    [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT:    [[MULVL:%.*]] = mul i64 [[SVLB]], 15
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
 // CHECK-NEXT:    [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
 // CHECK-NEXT:    tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]])


        


More information about the cfe-commits mailing list