[llvm-branch-commits] [llvm] release/20.x: [LLVM][MemCpyOpt] Unify alias tags if we optimize allocas (#129537) (PR #135615)
Nikita Popov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Apr 14 03:50:53 PDT 2025
https://github.com/nikic created https://github.com/llvm/llvm-project/pull/135615
Backport of 716b02d8c575afde7af1af13df145019659abca2, with conflicts in the test resolved.
>From e385f5c5b9bd32f89754e8088c29f42a761f2880 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Thu, 10 Apr 2025 12:23:53 +0200
Subject: [PATCH] [LLVM][MemCpyOpt] Unify alias tags if we optimize allocas
(#129537)
Optimization of alloca instructions may lead to invalid alias tags.
Incorrect alias tags can result in incorrect optimization outcomes for
Fortran source code compiled by Flang with flags: `-O3 -mmlir
-local-alloc-tbaa -flto`.
This commit removes alias tags when memcpy optimization replaces two
arrays with one array, thus ensuring correct compilation of Fortran
source code using flags: `-O3 -mmlir -local-alloc-tbaa -flto`.
This commit is also a proposal to fix the reported issue:
https://github.com/llvm/llvm-project/issues/133984
---------
Co-authored-by: Shilei Tian <i at tianshilei.me>
(cherry picked from commit 716b02d8c575afde7af1af13df145019659abca2)
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 19 +++--
llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll | 77 +++++++++++++++++++
llvm/test/Transforms/MemCpyOpt/stack-move.ll | 10 +--
3 files changed, 94 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 971d6012f6129..9202c341da92e 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1518,7 +1518,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
// to remove them.
SmallVector<Instruction *, 4> LifetimeMarkers;
- SmallSet<Instruction *, 4> NoAliasInstrs;
+ SmallSet<Instruction *, 4> AAMetadataInstrs;
bool SrcNotDom = false;
// Recursively track the user and check whether modified alias exist.
@@ -1573,8 +1573,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
}
}
- if (UI->hasMetadata(LLVMContext::MD_noalias))
- NoAliasInstrs.insert(UI);
+ AAMetadataInstrs.insert(UI);
+
if (!ModRefCallback(UI))
return false;
}
@@ -1679,11 +1679,16 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
}
// As this transformation can cause memory accesses that didn't previously
- // alias to begin to alias one another, we remove !noalias metadata from any
- // uses of either alloca. This is conservative, but more precision doesn't
- // seem worthwhile right now.
- for (Instruction *I : NoAliasInstrs)
+ // alias to begin to alias one another, we remove !alias.scope, !noalias,
+ // !tbaa and !tbaa_struct metadata from any uses of either alloca.
+ // This is conservative, but more precision doesn't seem worthwhile
+ // right now.
+ for (Instruction *I : AAMetadataInstrs) {
+ I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
I->setMetadata(LLVMContext::MD_noalias, nullptr);
+ I->setMetadata(LLVMContext::MD_tbaa, nullptr);
+ I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
+ }
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
NumStackMove++;
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
new file mode 100644
index 0000000000000..6e446e5ff267c
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
+
+define void @test() local_unnamed_addr {
+; CHECK-LABEL: define void @test() local_unnamed_addr {
+; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-NEXT: ret void
+;
+ %test_array_a = alloca [31 x float], align 4
+ %test_array_b = alloca [31 x float], align 4
+ %1 = getelementptr float, ptr %test_array_b, i64 1
+ store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !4
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
+ %2 = getelementptr float, ptr %test_array_a, i64 1
+ %3 = load float, ptr %2, align 4, !tbaa !7
+ ret void
+}
+
+%struct.Outer = type { float, double, %struct.Inner }
+%struct.Inner = type { i32, float }
+
+; Function Attrs: nounwind uwtable
+define dso_local float @f() {
+; CHECK-LABEL: define dso_local float @f() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TEST1:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 8
+; CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT: store float 0.000000e+00, ptr [[F]], align 8
+; CHECK-NEXT: [[F1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F1]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], 2.000000e+00
+; CHECK-NEXT: store float [[ADD]], ptr [[F1]], align 8
+; CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F2]], align 8
+; CHECK-NEXT: ret float [[TMP1]]
+;
+entry:
+ %test = alloca %struct.Outer, align 8
+ %test1 = alloca %struct.Outer, align 8
+ %f = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 0
+ store float 0.000000e+00, ptr %f, align 8, !tbaa !9
+ %inner_a = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 2
+ %i = getelementptr inbounds nuw %struct.Inner, ptr %inner_a, i32 0, i32 0
+ store i32 0, ptr %i, align 8, !tbaa !17
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %test, ptr align 8 %test1, i64 24, i1 false)
+ %f1 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+ %0 = load float, ptr %f1, align 8, !tbaa !9
+ %add = fadd float %0, 2.000000e+00
+ store float %add, ptr %f1, align 8, !tbaa !9
+ %f2 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+ %1 = load float, ptr %f2, align 8, !tbaa !9
+ ret float %1
+}
+
+!1 = !{!"any data access", !2, i64 0}
+!2 = !{!"any access", !3, i64 0}
+!3 = !{!"Flang function root test"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"allocated data/test_array_a", !6, i64 0}
+!6 = !{!"allocated data", !1, i64 0}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"allocated data/test_array_b", !6, i64 0}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Outer", !11, i64 0, !14, i64 8, !15, i64 16}
+!11 = !{!"float", !12, i64 0}
+!12 = !{!"omnipotent char", !13, i64 0}
+!13 = !{!"Simple C/C++ TBAA"}
+!14 = !{!"double", !12, i64 0}
+!15 = !{!"Inner", !16, i64 0, !11, i64 4}
+!16 = !{!"int", !12, i64 0}
+!17 = !{!10, !16, i64 16}
+
+
diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
index 6089c0a4d7cf5..5ff6f01021208 100644
--- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll
+++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll
@@ -259,7 +259,7 @@ define void @remove_scoped_noalias() {
; CHECK-LABEL: define void @remove_scoped_noalias() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
@@ -283,7 +283,7 @@ define void @remove_alloca_metadata() {
; CHECK-LABEL: define void @remove_alloca_metadata() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
@@ -308,7 +308,7 @@ define void @noalias_on_lifetime() {
; CHECK-LABEL: define void @noalias_on_lifetime() {
; CHECK-NEXT: [[SRC:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]]), !alias.scope !0
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: ret void
;
@@ -399,10 +399,10 @@ define void @terminator_lastuse() personality i32 0 {
; CHECK-NEXT: store [[STRUCT_FOO]] { i32 10, i32 20, i32 30 }, ptr [[SRC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @use_nocapture(ptr nocapture [[SRC]])
; CHECK-NEXT: [[RV:%.*]] = invoke i32 @use_nocapture(ptr [[SRC]])
-; CHECK-NEXT: to label [[SUC:%.*]] unwind label [[UNW:%.*]]
+; CHECK-NEXT: to label [[SUC:%.*]] unwind label [[UNW:%.*]]
; CHECK: unw:
; CHECK-NEXT: [[LP:%.*]] = landingpad i32
-; CHECK-NEXT: cleanup
+; CHECK-NEXT: cleanup
; CHECK-NEXT: resume i32 0
; CHECK: suc:
; CHECK-NEXT: ret void
More information about the llvm-branch-commits
mailing list