[llvm] [MemCpyOpt] Require writable object during call slot optimization (PR #71542)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 7 06:30:35 PST 2023


https://github.com/nikic created https://github.com/llvm/llvm-project/pull/71542

Call slot optimization may introduce writes to the destination object that occur earlier than in the original function. We currently already check that that the destination is dereferenceable and aligned, but we do not make sure that it is writable. As such, we might introduce a write to read-only memory, or introduce a data race.

Fix this by checking that the object is writable. For arguments, this is indicated by the new writable attribute. Tests using sret/dereferenceable are updated to use it.

>From c423bd6e92236827bc13d9c39087dea384d9080d Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 7 Nov 2023 15:07:10 +0100
Subject: [PATCH] [MemCpyOpt] Require writable object during call slot
 optimization

Call slot optimization may introduce writes to the destination
object that occur earlier than in the original function. We
currently already check that that the destination is dereferenceable
and aligned, but we do not make sure that it is writable. As such,
we might introduce a write to read-only memory, or introduce a
data race.

Fix this by checking that the object is writable. For arguments,
this is indicated by the new writable attribute. Tests using
sret/dereferenceable are updated to use it.
---
 llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp  | 10 ++++++----
 llvm/test/Transforms/MemCpyOpt/callslot.ll      | 17 +++++++++++++++--
 .../test/Transforms/MemCpyOpt/callslot_deref.ll |  4 ++--
 .../Transforms/MemCpyOpt/callslot_noalias.ll    |  4 ++--
 .../test/Transforms/MemCpyOpt/loadstore-sret.ll |  2 +-
 llvm/test/Transforms/MemCpyOpt/memcpy.ll        |  2 +-
 llvm/test/Transforms/MemCpyOpt/sret.ll          |  2 +-
 7 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 39ad3d87779b526..7789458364caeed 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -925,10 +925,12 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
       return false;
   }
 
-  // Check that accessing the first srcSize bytes of dest will not cause a
-  // trap.  Otherwise the transform is invalid since it might cause a trap
-  // to occur earlier than it otherwise would.
-  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
+  // Check that storing to the first srcSize bytes of dest will not cause a
+  // trap or data race.
+  bool ExplicitlyDereferenceableOnly;
+  if (!isWritableObject(getUnderlyingObject(cpyDest),
+                        ExplicitlyDereferenceableOnly) ||
+      !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
                                           DL, C, AC, DT)) {
     LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
     return false;
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll
index 8c769319236d654..877b73316f9018e 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll
@@ -69,7 +69,7 @@ define void @write_src_between_call_and_memcpy() {
   ret void
 }
 
-define void @throw_between_call_and_mempy(ptr dereferenceable(16) %dest.i8) {
+define void @throw_between_call_and_mempy(ptr writable dereferenceable(16) %dest.i8) {
 ; CHECK-LABEL: @throw_between_call_and_mempy(
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [16 x i8], align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 16, i1 false)
@@ -209,7 +209,7 @@ nocaptures:
   ret void
 }
 
-define void @source_alignment(ptr noalias dereferenceable(128) %dst) {
+define void @source_alignment(ptr noalias writable dereferenceable(128) %dst) {
 ; CHECK-LABEL: @source_alignment(
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [128 x i8], align 4
 ; CHECK-NEXT:    call void @accept_ptr(ptr nocapture [[DST:%.*]]) #[[ATTR3]]
@@ -221,6 +221,19 @@ define void @source_alignment(ptr noalias dereferenceable(128) %dst) {
   ret void
 }
 
+define void @dest_not_writable(ptr noalias dereferenceable(128) %dst) {
+; CHECK-LABEL: @dest_not_writable(
+; CHECK-NEXT:    [[SRC:%.*]] = alloca [128 x i8], align 4
+; CHECK-NEXT:    call void @accept_ptr(ptr nocapture [[SRC]]) #[[ATTR3]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST:%.*]], ptr [[SRC]], i64 128, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %src = alloca [128 x i8], align 4
+  call void @accept_ptr(ptr nocapture %src) nounwind
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dst, ptr %src, i64 128, i1 false)
+  ret void
+}
+
 declare void @may_throw()
 declare void @accept_ptr(ptr)
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
index e66f702a54326a5..dcbede105a3b13c 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll
@@ -6,7 +6,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64,
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 
 ; all bytes of %dst that are touch by the memset are dereferenceable
-define void @must_remove_memcpy(ptr noalias nocapture dereferenceable(4096) %dst) nofree nosync {
+define void @must_remove_memcpy(ptr noalias nocapture writable dereferenceable(4096) %dst) nofree nosync {
 ; CHECK-LABEL: @must_remove_memcpy(
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [4096 x i8], align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[DST:%.*]], i8 0, i64 4096, i1 false)
@@ -20,7 +20,7 @@ define void @must_remove_memcpy(ptr noalias nocapture dereferenceable(4096) %dst
 
 ; memset touch more bytes than those guaranteed to be dereferenceable
 ; We can't remove the memcpy, but we can turn it into an independent memset.
-define void @must_not_remove_memcpy(ptr noalias nocapture dereferenceable(1024) %dst) nofree nosync {
+define void @must_not_remove_memcpy(ptr noalias nocapture writable dereferenceable(1024) %dst) nofree nosync {
 ; CHECK-LABEL: @must_not_remove_memcpy(
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [4096 x i8], align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[SRC]], i8 0, i64 4096, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_noalias.ll b/llvm/test/Transforms/MemCpyOpt/callslot_noalias.ll
index 483ce2f6b6cf8af..9b51b6cec3b45fb 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_noalias.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_noalias.ll
@@ -5,10 +5,10 @@ declare void @func(ptr %dst)
 
 ; The noalias metadata from the call, the load and the store should be merged,
 ; so that no metadata is left on the call.
-define i8 @test(ptr dereferenceable(1) noalias %dst) {
+define i8 @test(ptr writable dereferenceable(1) noalias %dst) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @func(ptr nocapture [[DST:%.*]]) #[[ATTR0:[0-9]+]]{{$}}
+; CHECK-NEXT:    call void @func(ptr nocapture [[DST:%.*]]) #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    [[V2:%.*]] = load i8, ptr [[DST]], align 1, !alias.scope !0
 ; CHECK-NEXT:    ret i8 [[V2]]
 ;
diff --git a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll
index 461205e7bbcf479..20f570f2cb5311b 100644
--- a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll
+++ b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 %"class.std::auto_ptr" = type { ptr }
 
-define void @_Z3foov(ptr noalias nocapture sret(%"class.std::auto_ptr") %agg.result) ssp {
+define void @_Z3foov(ptr noalias nocapture writable sret(%"class.std::auto_ptr") %agg.result) ssp {
 ; CHECK-LABEL: @_Z3foov(
 ; CHECK-NEXT:  _ZNSt8auto_ptrIiED1Ev.exit:
 ; CHECK-NEXT:    [[TEMP_LVALUE:%.*]] = alloca %"class.std::auto_ptr", align 8
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index 413d72a8e611558..cad2170b271e29d 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -142,7 +142,7 @@ define void @test6_memcpy(ptr %src, ptr %dest) nounwind {
 
 @x = external global %0
 
-define void @test3(ptr noalias sret(%0) %agg.result) nounwind  {
+define void @test3(ptr noalias writable sret(%0) %agg.result) nounwind  {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:    [[X_0:%.*]] = alloca [[TMP0:%.*]], align 16
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[AGG_RESULT:%.*]], ptr align 16 @x, i32 32, i1 false)
diff --git a/llvm/test/Transforms/MemCpyOpt/sret.ll b/llvm/test/Transforms/MemCpyOpt/sret.ll
index efb591242c2f135..1d0f0934ec2da2d 100644
--- a/llvm/test/Transforms/MemCpyOpt/sret.ll
+++ b/llvm/test/Transforms/MemCpyOpt/sret.ll
@@ -6,7 +6,7 @@ target triple = "i686-apple-darwin9"
 
 %0 = type { x86_fp80, x86_fp80 }
 
-define void @ccosl(ptr noalias sret(%0) %agg.result, ptr byval(%0) align 8 %z) nounwind {
+define void @ccosl(ptr noalias writable sret(%0) %agg.result, ptr byval(%0) align 8 %z) nounwind {
 ; CHECK-LABEL: @ccosl(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[IZ:%.*]] = alloca [[TMP0:%.*]], align 16



More information about the llvm-commits mailing list