[llvm-branch-commits] [llvm] release/20.x: [LLVM][MemCpyOpt] Unify alias tags if we optimize allocas (#129537) (PR #135615)

Nikita Popov via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Apr 14 03:50:53 PDT 2025


https://github.com/nikic created https://github.com/llvm/llvm-project/pull/135615

Backport of 716b02d8c575afde7af1af13df145019659abca2, with conflicts in the test resolved.

>From e385f5c5b9bd32f89754e8088c29f42a761f2880 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Thu, 10 Apr 2025 12:23:53 +0200
Subject: [PATCH] [LLVM][MemCpyOpt] Unify alias tags if we optimize allocas
 (#129537)

Optimization of alloca instructions may lead to invalid alias tags.
Incorrect alias tags can result in incorrect optimization outcomes for
Fortran source code compiled by Flang with flags: `-O3 -mmlir
-local-alloc-tbaa -flto`.

This commit removes alias tags when memcpy optimization replaces two
arrays with one array, thus ensuring correct compilation of Fortran
source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`.

This commit is also a proposal to fix the reported issue:
https://github.com/llvm/llvm-project/issues/133984

---------

Co-authored-by: Shilei Tian <i at tianshilei.me>
(cherry picked from commit 716b02d8c575afde7af1af13df145019659abca2)
---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 19 +++--
 llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll | 77 +++++++++++++++++++
 llvm/test/Transforms/MemCpyOpt/stack-move.ll  | 10 +--
 3 files changed, 94 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 971d6012f6129..9202c341da92e 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1518,7 +1518,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
   // to remove them.
 
   SmallVector<Instruction *, 4> LifetimeMarkers;
-  SmallSet<Instruction *, 4> NoAliasInstrs;
+  SmallSet<Instruction *, 4> AAMetadataInstrs;
   bool SrcNotDom = false;
 
   // Recursively track the user and check whether modified alias exist.
@@ -1573,8 +1573,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
               continue;
             }
           }
-          if (UI->hasMetadata(LLVMContext::MD_noalias))
-            NoAliasInstrs.insert(UI);
+          AAMetadataInstrs.insert(UI);
+
           if (!ModRefCallback(UI))
             return false;
         }
@@ -1679,11 +1679,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
   }
 
   // As this transformation can cause memory accesses that didn't previously
-  // alias to begin to alias one another, we remove !noalias metadata from any
-  // uses of either alloca. This is conservative, but more precision doesn't
-  // seem worthwhile right now.
-  for (Instruction *I : NoAliasInstrs)
+  // alias to begin to alias one another, we remove !alias.scope, !noalias,
+  // !tbaa and !tbaa_struct metadata from any uses of either alloca.
+  // This is conservative, but more precision doesn't seem worthwhile
+  // right now.
+  for (Instruction *I : AAMetadataInstrs) {
+    I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
     I->setMetadata(LLVMContext::MD_noalias, nullptr);
+    I->setMetadata(LLVMContext::MD_tbaa, nullptr);
+    I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
+  }
 
   LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
   NumStackMove++;
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
new file mode 100644
index 0000000000000..6e446e5ff267c
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
+
+define void @test() local_unnamed_addr {
+; CHECK-LABEL: define void @test() local_unnamed_addr {
+; CHECK-NEXT:    [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT:    store float 0x3E6AA51880000000, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %test_array_a = alloca [31 x float], align 4
+  %test_array_b = alloca [31 x float], align 4
+  %1 = getelementptr float, ptr %test_array_b, i64 1
+  store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !4
+  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
+  %2 = getelementptr float, ptr %test_array_a, i64 1
+  %3 = load float, ptr %2, align 4, !tbaa !7
+  ret void
+}
+
+%struct.Outer = type { float, double, %struct.Inner }
+%struct.Inner = type { i32, float }
+
+; Function Attrs: nounwind uwtable
+define dso_local float @f() {
+; CHECK-LABEL: define dso_local float @f() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TEST1:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 8
+; CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT:    store float 0.000000e+00, ptr [[F]], align 8
+; CHECK-NEXT:    [[F1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[F1]], align 8
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], 2.000000e+00
+; CHECK-NEXT:    store float [[ADD]], ptr [[F1]], align 8
+; CHECK-NEXT:    [[F2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[F2]], align 8
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+entry:
+  %test = alloca %struct.Outer, align 8
+  %test1 = alloca %struct.Outer, align 8
+  %f = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 0
+  store float 0.000000e+00, ptr %f, align 8, !tbaa !9
+  %inner_a = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 2
+  %i = getelementptr inbounds nuw %struct.Inner, ptr %inner_a, i32 0, i32 0
+  store i32 0, ptr %i, align 8, !tbaa !17
+  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %test, ptr align 8 %test1, i64 24, i1 false)
+  %f1 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+  %0 = load float, ptr %f1, align 8, !tbaa !9
+  %add = fadd float %0, 2.000000e+00
+  store float %add, ptr %f1, align 8, !tbaa !9
+  %f2 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+  %1 = load float, ptr %f2, align 8, !tbaa !9
+  ret float %1
+}
+
+!1 = !{!"any data access", !2, i64 0}
+!2 = !{!"any access", !3, i64 0}
+!3 = !{!"Flang function root test"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"allocated data/test_array_a", !6, i64 0}
+!6 = !{!"allocated data", !1, i64 0}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"allocated data/test_array_b", !6, i64 0}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Outer", !11, i64 0, !14, i64 8, !15, i64 16}
+!11 = !{!"float", !12, i64 0}
+!12 = !{!"omnipotent char", !13, i64 0}
+!13 = !{!"Simple C/C++ TBAA"}
+!14 = !{!"double", !12, i64 0}
+!15 = !{!"Inner", !16, i64 0, !11, i64 4}
+!16 = !{!"int", !12, i64 0}
+!17 = !{!10, !16, i64 16}
+
+
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 6089c0a4d7cf5..5ff6f01021208 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -259,7 +259,7 @@ define void @remove_scoped_noalias() {
 ; CHECK-LABEL: define void @remove_scoped_noalias() {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    ret void
 ;
@@ -283,7 +283,7 @@ define void @remove_alloca_metadata() {
 ; CHECK-LABEL: define void @remove_alloca_metadata() {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    ret void
 ;
@@ -308,7 +308,7 @@ define void @noalias_on_lifetime() {
 ; CHECK-LABEL: define void @noalias_on_lifetime() {
 ; CHECK-NEXT:    [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    ret void
 ;
@@ -399,10 +399,10 @@ define void @terminator_lastuse() personality i32 0 {
 ; CHECK-NEXT:    store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
 ; CHECK-NEXT:    [[RV:%.*]] = invoke i32 @use_nocapture(ptr [[SRC]])
-; CHECK-NEXT:    to label [[SUC:%.*]] unwind label [[UNW:%.*]]
+; CHECK-NEXT:            to label [[SUC:%.*]] unwind label [[UNW:%.*]]
 ; CHECK:       unw:
 ; CHECK-NEXT:    [[LP:%.*]] = landingpad i32
-; CHECK-NEXT:    cleanup
+; CHECK-NEXT:            cleanup
 ; CHECK-NEXT:    resume i32 0
 ; CHECK:       suc:
 ; CHECK-NEXT:    ret void



More information about the llvm-branch-commits mailing list