[llvm] [AArch64] Don't tail call memset if it would convert to a bzero. (PR #98969)
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 15:29:52 PDT 2024
https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/98969
>From f9b342324834480eb3403892af205f202300f1b1 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Mon, 15 Jul 2024 14:48:08 -0700
Subject: [PATCH 1/2] [AArch64] Don't tail call memset if it would convert to a
bzero.
Well, not quite that simple. We can tc memset since it returns the first
argument but bzero doesn't do that and therefore we can end up miscompiling.
rdar://131419786
---
llvm/lib/CodeGen/Analysis.cpp | 7 ++++++-
.../AArch64/no-tail-call-bzero-from-memset.ll | 20 +++++++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index 7fc18639e5852..2a3015866da5f 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -677,6 +677,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// will be expanded as memcpy in libc, which returns the first
// argument. On other platforms like arm-none-eabi, memcpy may be
// expanded as library call without return value, like __aeabi_memcpy.
+ // Similarly, llvm.memset can be expanded to bzero, which doesn't have a
+ // return value either.
const CallInst *Call = cast<CallInst>(I);
if (Function *F = Call->getCalledFunction()) {
Intrinsic::ID IID = F->getIntrinsicID();
@@ -685,7 +687,10 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
(IID == Intrinsic::memmove &&
TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
(IID == Intrinsic::memset &&
- TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
+ TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset") &&
+ (!isa<ConstantInt>(Call->getOperand(1)) ||
+ !cast<ConstantInt>(Call->getOperand(1))->isZero() ||
+ !TLI.getLibcallName(RTLIB::BZERO)))) &&
(RetVal == Call->getArgOperand(0) ||
isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0))))
return true;
diff --git a/llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll b/llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll
new file mode 100644
index 0000000000000..90e641cd4fe3d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll
@@ -0,0 +1,20 @@
+; RUN: llc -o - %s | FileCheck %s
+; RUN: llc -global-isel -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx15.0.0"
+
+define ptr @test() {
+; CHECK-LABEL: test:
+; CHECK-NOT: b _bzero
+ %1 = tail call ptr @fn(i32 noundef 1) #3
+ tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %1, i8 noundef 0, i64 noundef 1000, i1 noundef false) #3
+ ret ptr %1
+}
+
+declare ptr @fn(i32 noundef)
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
+
+attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #3 = { nounwind optsize }
>From a316dcf44c9738f39c49c556195df1e231296b0f Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Mon, 15 Jul 2024 15:29:29 -0700
Subject: [PATCH 2/2] Change to positive check in test.
---
llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll b/llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll
index 90e641cd4fe3d..34c6c63cc1798 100644
--- a/llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll
+++ b/llvm/test/CodeGen/AArch64/no-tail-call-bzero-from-memset.ll
@@ -5,7 +5,7 @@ target triple = "arm64-apple-macosx15.0.0"
define ptr @test() {
; CHECK-LABEL: test:
-; CHECK-NOT: b _bzero
+; CHECK: bl _bzero
%1 = tail call ptr @fn(i32 noundef 1) #3
tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(1000) %1, i8 noundef 0, i64 noundef 1000, i1 noundef false) #3
ret ptr %1
More information about the llvm-commits
mailing list