[clang] a8cbd27 - [Clang][AArch64] svldr_vnum/svstr_vnum should use cntsb iso vscale for the offset
Sander de Smalen via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 24 07:30:31 PDT 2023
Author: Sander de Smalen
Date: 2023-07-24T14:29:45Z
New Revision: a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f
URL: https://github.com/llvm/llvm-project/commit/a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f
DIFF: https://github.com/llvm/llvm-project/commit/a8cbd27d1f238e104a5d5ca345d93bc1f4d4ab1f.diff
LOG: [Clang][AArch64] svldr_vnum/svstr_vnum should use cntsb iso vscale for the offset
The specification for LDR/STR says that:
The ZA array vector is selected by the sum of the vector select register
and immediate offset, modulo the number of bytes in a Streaming SVE
vector. [..] This instruction does not require the PE to be in Streaming
SVE mode
When the instruction is used outside of streaming mode, 'vscale' will result
in the wrong value being used for the offset because LLVM's code-generator
will emit the non-streaming 'RDVL/ADDVL' instead of the 'RDSVL/ADDSVL'
instructions which are used to get the Streaming-SVE vector length.
Reviewed By: bryanpkc
Differential Revision: https://reviews.llvm.org/D156121
Added:
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b7fbafda0e5336..d5ad35c0c93602 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9508,11 +9508,11 @@ Value *CodeGenFunction::EmitSMEZero(SVETypeFlags TypeFlags,
Value *CodeGenFunction::EmitSMELdrStr(SVETypeFlags TypeFlags,
SmallVectorImpl<Value *> &Ops,
unsigned IntID) {
- Function *Vscale = CGM.getIntrinsic(Intrinsic::vscale, Int64Ty);
- llvm::Value *VscaleCall = Builder.CreateCall(Vscale, {}, "vscale");
+ Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+ llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
llvm::Value *MulVL = Builder.CreateMul(
- VscaleCall,
- Builder.getInt64(16 * cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
+ CntsbCall,
+ Builder.getInt64(cast<llvm::ConstantInt>(Ops[1])->getZExtValue()),
"mulvl");
Ops[2] = Builder.CreateGEP(Int8Ty, Ops[2], MulVL);
Ops[0] = EmitTileslice(Ops[1], Ops[0]);
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index c96df003da6efc..7efa8b1556857c 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -18,8 +18,8 @@ void test_svldr_vnum_za(uint32_t slice_base, const void *ptr) {
// CHECK-C-LABEL: @test_svldr_vnum_za_1(
// CHECK-CXX-LABEL: @_Z20test_svldr_vnum_za_1jPKv(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TILESLICE]], ptr [[TMP0]])
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
index d4b1b9fa908acb..12aa298858a18e 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_str.c
@@ -18,8 +18,8 @@ void test_svstr_vnum_za(uint32_t slice_base, void *ptr) {
// CHECK-C-LABEL: @test_svstr_vnum_za_1(
// CHECK-CXX-LABEL: @_Z20test_svstr_vnum_za_1jPv(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64()
-// CHECK-NEXT: [[MULVL:%.*]] = mul nuw nsw i64 [[VSCALE]], 240
+// CHECK-NEXT: [[SVLB:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
+// CHECK-NEXT: [[MULVL:%.*]] = mul i64 [[SVLB]], 15
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[MULVL]]
// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TILESLICE]], ptr [[TMP0]])
More information about the cfe-commits
mailing list