[llvm] [LLVM][MemCpyOpt] Unify alias tags if we optimize allocas (PR #129537)
Dominik Adamski via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 05:06:20 PDT 2025
https://github.com/DominikAdamski updated https://github.com/llvm/llvm-project/pull/129537
>From 4c4951252f4f5115f80f09aaacba45991e92ddcf Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 24 Feb 2025 06:15:43 -0600
Subject: [PATCH 1/6] [LLVM][MemCpyOpt] Unify alias tags if we optimize allocas
Optimization of alloca instructions may lead to invalid alias tags.
Incorrect alias tags can lead to wrong optimization results.
This commit unifies alias tags if memcpy optimization
replaces two arrays by one array.
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 24 ++++-
llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll | 99 +++++++++++++++++++
2 files changed, 119 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 43496d1c80df5..10342a6b32725 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1516,6 +1516,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
SmallVector<Instruction *, 4> LifetimeMarkers;
SmallSet<Instruction *, 4> NoAliasInstrs;
bool SrcNotDom = false;
+ SmallSet<Instruction *, 4> SrcAllocaInstUsers;
+ SmallSet<Instruction *, 4> DestAllocaInstUsers;
// Recursively track the user and check whether modified alias exist.
auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
@@ -1524,8 +1526,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
};
auto CaptureTrackingWithModRef =
- [&](Instruction *AI,
- function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+ [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
+ SmallSet<Instruction *, 4> &AllocaInstUsersWithTBAA) -> bool {
SmallVector<Instruction *, 8> Worklist;
Worklist.push_back(AI);
unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
@@ -1569,6 +1571,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
}
}
+ if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa)) {
+ AllocaInstUsersWithTBAA.insert(UI);
+ }
if (UI->hasMetadata(LLVMContext::MD_noalias))
NoAliasInstrs.insert(UI);
if (!ModRefCallback(UI))
@@ -1621,7 +1626,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
+ DestAllocaInstUsers))
return false;
// Bailout if Dest may have any ModRef before Store.
if (!ReachabilityWorklist.empty() &&
@@ -1647,7 +1653,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
return true;
};
- if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
+ SrcAllocaInstUsers))
return false;
// We can do the transformation. First, move the SrcAlloca to the start of the
@@ -1681,6 +1688,15 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
for (Instruction *I : NoAliasInstrs)
I->setMetadata(LLVMContext::MD_noalias, nullptr);
+ // If we merge two allocas we need to uniform alias tags as well
+ if (!SrcAllocaInstUsers.empty()) {
+ MDNode *mergeTBAA =
+ (*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa);
+ for (Instruction *it : DestAllocaInstUsers) {
+ it->setMetadata(LLVMContext::MD_tbaa, mergeTBAA);
+ }
+ }
+
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
NumStackMove++;
return true;
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
new file mode 100644
index 0000000000000..4362892f0e8c2
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
+; The aim of this test is to check if MemCpyOpt pass merges alias tags
+; after memcpy optimization
+
+; ModuleID = 'FIRModule'
+source_filename = "FIRModule"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at data_arr = internal unnamed_addr constant [31 x float] [float 0x3E68DA0CA0000000, float 0x3E692863A0000000, float 0x3E6AEF5000000000, float 0x3E6E2272C0000000, float 0x3E7271B720000000, float 0x3E777DA440000000, float 0x3E7E8C46C0000000, float 0x3E8458EFC0000000, float 0x3E8D0123C0000000, float 0x3E95E78260000000, float 0x3EA0AB7AC0000000, float 0x3EA89F4B40000000, float 0x3EB10FFB60000000, float 0x3EB5F1D140000000, float 0x3EBB435260000000, float 0x3EC0DE9700000000, float 0x3EC51B11A0000000, float 0x3ECA419FC0000000, float 0x3ED01B2B20000000, float 0x3ED3B9CEC0000000, float 0x3ED7028C40000000, float 0x3EDA60C320000000, float 0x3EDD54AD40000000, float 0x3EDF6E9F00000000, float 0x3EE130BB20000000, float 0x3EE4332400000000, float 0x3EE7575F80000000, float 0x3EE8088A60000000, float 0x3EE3B0AE60000000, float 0x3ED9BB6800000000, float 0x3ED9BB6800000000]
+
+; CHECK-LABEL: @test(
+; CHECK: [[ARR_UNDER_TEST:%.*]] = alloca [31 x float], align 4
+; CHECK: store float 0x3E6AA51880000000, ptr [[ARR_UNDER_TEST]], align 4, !tbaa [[ARR_TAG:!.[0-9]+]]
+; CHECK-LABEL: init_loop:
+; CHECK: store float [[TMP0:%.*]], ptr [[TMP1:%.*]], align 4, !tbaa [[ARR_TAG]]
+; CHECK-LABEL: loop:
+; CHECK: [[TMP2:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]], i64 [[TMP3:%.*]]
+; CHECK: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[ARR_TAG]]
+define void @test(ptr captures(none) %0, ptr readonly captures(none) %1, ptr readonly captures(none) %2, ptr readonly captures(none) %3) local_unnamed_addr #0 {
+ %5 = alloca [32 x float], align 4
+ %6 = alloca [31 x float], align 4
+ %7 = alloca [31 x float], align 4
+ %8 = load i32, ptr %2, align 4, !tbaa !4
+ %9 = sext i32 %8 to i64
+ %10 = load i32, ptr %3, align 4, !tbaa !10
+ %11 = add i32 %10, 1
+ %12 = sext i32 %11 to i64
+ %13 = sub nsw i64 %12, %9
+ %14 = tail call i64 @llvm.smax.i64(i64 %13, i64 -1)
+ %15 = add nsw i64 %14, 1
+ %16 = alloca float, i64 %15, align 4
+ store float 0x3E6AA51880000000, ptr %7, align 4, !tbaa !12
+ br label %init_loop
+
+init_loop:
+ %19 = phi float [ 0x3E68DA0CA0000000, %4 ], [ %22, %init_loop ]
+ %indvars.iv = phi i64 [ 2, %4 ], [ %indvars.iv.next, %init_loop ]
+ %20 = add nsw i64 %indvars.iv, -1
+ %21 = getelementptr float, ptr @data_arr, i64 %20
+ %22 = load float, ptr %21, align 4, !tbaa !15
+ %23 = fsub contract float %22, %19
+ %33 = getelementptr float, ptr %7, i64 %20
+ store float %23, ptr %33, align 4, !tbaa !12
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 32
+ br i1 %exitcond.not, label %.preheader55.preheader, label %init_loop
+
+.preheader55.preheader:
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %6, ptr noundef nonnull align 4 dereferenceable(124) %7, i64 124, i1 false), !tbaa !22
+ %154 = icmp sgt i64 %13, -1
+ br i1 %154, label %loop, label %._crit_edge56
+
+loop: ; preds = %.preheader, %211
+ %indvars.iv73 = phi i64 [ 0, %.preheader55.preheader ], [ %indvars.iv.next74, %loop ]
+ %indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1
+ %223 = getelementptr float, ptr %6, i64 %indvars.iv73
+ %225 = load float, ptr %223, align 4, !tbaa !31
+ %exitcond76.not = icmp eq i64 %indvars.iv.next74, 32
+ br i1 %exitcond76.not, label %loop, label %._crit_edge56
+
+._crit_edge56: ; preds = %loop, %._crit_edge
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.smax.i64(i64, i64) #1
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #2
+
+attributes #0 = { "target-cpu"="x86-64" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{!"flang version 21.0.0 (https://github.com/llvm/llvm-project.git 4d79f420ce5b5100f72f720eab2d3881f97abd0d)"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"dummy arg data/param_1", !6, i64 0}
+!6 = !{!"dummy arg data", !7, i64 0}
+!7 = !{!"any data access", !8, i64 0}
+!8 = !{!"any access", !9, i64 0}
+!9 = !{!"Flang function root test"}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"dummy arg data/param_2", !6, i64 0}
+!12 = !{!13, !13, i64 0}
+!13 = !{!"allocated data/test_array_a", !14, i64 0}
+!14 = !{!"allocated data", !7, i64 0}
+!15 = !{!16, !16, i64 0}
+!16 = !{!"global data/data_arr", !17, i64 0}
+!17 = !{!"global data", !7, i64 0}
+!22 = !{!14, !14, i64 0}
+!31 = !{!32, !32, i64 0}
+!32 = !{!"allocated data/test_array_b", !14, i64 0}
>From 5e04c39eb96a47dc7eb473ec9a737c41386e58cb Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Mon, 3 Mar 2025 14:35:09 -0600
Subject: [PATCH 2/6] Simplified test
---
llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll | 73 ++++++++-----------
1 file changed, 31 insertions(+), 42 deletions(-)
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
index 4362892f0e8c2..b5c33f1dfc750 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -2,60 +2,58 @@
; The aim of this test is to check if MemCpyOpt pass merges alias tags
; after memcpy optimization
+; High level overview of this test
+; Input:
+; function test() {
+; //declaration of local arrays a and b
+; //initialization of array b in init_loop
+; //initialization of array a -> copy of array b
+; //use array a in loop
+; }
+;
+; Expected output after optimization:
+; function test() {
+; //declaration of local array b
+; //initialization of array b in init_loop
+; //use array b in loop
+; }
+
; ModuleID = 'FIRModule'
source_filename = "FIRModule"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
- at data_arr = internal unnamed_addr constant [31 x float] [float 0x3E68DA0CA0000000, float 0x3E692863A0000000, float 0x3E6AEF5000000000, float 0x3E6E2272C0000000, float 0x3E7271B720000000, float 0x3E777DA440000000, float 0x3E7E8C46C0000000, float 0x3E8458EFC0000000, float 0x3E8D0123C0000000, float 0x3E95E78260000000, float 0x3EA0AB7AC0000000, float 0x3EA89F4B40000000, float 0x3EB10FFB60000000, float 0x3EB5F1D140000000, float 0x3EBB435260000000, float 0x3EC0DE9700000000, float 0x3EC51B11A0000000, float 0x3ECA419FC0000000, float 0x3ED01B2B20000000, float 0x3ED3B9CEC0000000, float 0x3ED7028C40000000, float 0x3EDA60C320000000, float 0x3EDD54AD40000000, float 0x3EDF6E9F00000000, float 0x3EE130BB20000000, float 0x3EE4332400000000, float 0x3EE7575F80000000, float 0x3EE8088A60000000, float 0x3EE3B0AE60000000, float 0x3ED9BB6800000000, float 0x3ED9BB6800000000]
-
; CHECK-LABEL: @test(
; CHECK: [[ARR_UNDER_TEST:%.*]] = alloca [31 x float], align 4
-; CHECK: store float 0x3E6AA51880000000, ptr [[ARR_UNDER_TEST]], align 4, !tbaa [[ARR_TAG:!.[0-9]+]]
; CHECK-LABEL: init_loop:
-; CHECK: store float [[TMP0:%.*]], ptr [[TMP1:%.*]], align 4, !tbaa [[ARR_TAG]]
+; CHECK: [[TMP0:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]],
+; CHECK: store float 0x3E6AA51880000000, ptr [[TMP0]], align 4, !tbaa [[ARR_TAG:![0-9]+]]
; CHECK-LABEL: loop:
-; CHECK: [[TMP2:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]], i64 [[TMP3:%.*]]
-; CHECK: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[ARR_TAG]]
-define void @test(ptr captures(none) %0, ptr readonly captures(none) %1, ptr readonly captures(none) %2, ptr readonly captures(none) %3) local_unnamed_addr #0 {
- %5 = alloca [32 x float], align 4
- %6 = alloca [31 x float], align 4
- %7 = alloca [31 x float], align 4
- %8 = load i32, ptr %2, align 4, !tbaa !4
- %9 = sext i32 %8 to i64
- %10 = load i32, ptr %3, align 4, !tbaa !10
- %11 = add i32 %10, 1
- %12 = sext i32 %11 to i64
- %13 = sub nsw i64 %12, %9
- %14 = tail call i64 @llvm.smax.i64(i64 %13, i64 -1)
- %15 = add nsw i64 %14, 1
- %16 = alloca float, i64 %15, align 4
- store float 0x3E6AA51880000000, ptr %7, align 4, !tbaa !12
+; CHECK: [[TMP2:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]], i64 [[TMP3:%.*]]
+; CHECK: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[ARR_TAG]]
+
+define void @test() local_unnamed_addr #0 {
+ %test_array_a = alloca [31 x float], align 4
+ %test_array_b = alloca [31 x float], align 4
br label %init_loop
init_loop:
- %19 = phi float [ 0x3E68DA0CA0000000, %4 ], [ %22, %init_loop ]
- %indvars.iv = phi i64 [ 2, %4 ], [ %indvars.iv.next, %init_loop ]
- %20 = add nsw i64 %indvars.iv, -1
- %21 = getelementptr float, ptr @data_arr, i64 %20
- %22 = load float, ptr %21, align 4, !tbaa !15
- %23 = fsub contract float %22, %19
- %33 = getelementptr float, ptr %7, i64 %20
- store float %23, ptr %33, align 4, !tbaa !12
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %init_loop ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %1 = getelementptr float, ptr %test_array_b, i64 %indvars.iv
+ store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !12
%exitcond.not = icmp eq i64 %indvars.iv.next, 32
br i1 %exitcond.not, label %.preheader55.preheader, label %init_loop
.preheader55.preheader:
- call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %6, ptr noundef nonnull align 4 dereferenceable(124) %7, i64 124, i1 false), !tbaa !22
- %154 = icmp sgt i64 %13, -1
- br i1 %154, label %loop, label %._crit_edge56
+ call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
+ br label %loop
loop: ; preds = %.preheader, %211
%indvars.iv73 = phi i64 [ 0, %.preheader55.preheader ], [ %indvars.iv.next74, %loop ]
%indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1
- %223 = getelementptr float, ptr %6, i64 %indvars.iv73
- %225 = load float, ptr %223, align 4, !tbaa !31
+ %2 = getelementptr float, ptr %test_array_a, i64 %indvars.iv73
+ %3 = load float, ptr %2, align 4, !tbaa !31
%exitcond76.not = icmp eq i64 %indvars.iv.next74, 32
br i1 %exitcond76.not, label %loop, label %._crit_edge56
@@ -80,20 +78,11 @@ attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{!"flang version 21.0.0 (https://github.com/llvm/llvm-project.git 4d79f420ce5b5100f72f720eab2d3881f97abd0d)"}
-!4 = !{!5, !5, i64 0}
-!5 = !{!"dummy arg data/param_1", !6, i64 0}
-!6 = !{!"dummy arg data", !7, i64 0}
!7 = !{!"any data access", !8, i64 0}
!8 = !{!"any access", !9, i64 0}
!9 = !{!"Flang function root test"}
-!10 = !{!11, !11, i64 0}
-!11 = !{!"dummy arg data/param_2", !6, i64 0}
!12 = !{!13, !13, i64 0}
!13 = !{!"allocated data/test_array_a", !14, i64 0}
!14 = !{!"allocated data", !7, i64 0}
-!15 = !{!16, !16, i64 0}
-!16 = !{!"global data/data_arr", !17, i64 0}
-!17 = !{!"global data", !7, i64 0}
-!22 = !{!14, !14, i64 0}
!31 = !{!32, !32, i64 0}
!32 = !{!"allocated data/test_array_b", !14, i64 0}
>From dddcc8a347fd2b5ef0ba29fe03cfd6495e99ac7f Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 18 Mar 2025 11:42:11 +0100
Subject: [PATCH 3/6] Fix style
Co-authored-by: Shilei Tian <i at tianshilei.me>
---
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 10342a6b32725..522da906afba9 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1692,9 +1692,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
if (!SrcAllocaInstUsers.empty()) {
MDNode *mergeTBAA =
(*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa);
- for (Instruction *it : DestAllocaInstUsers) {
- it->setMetadata(LLVMContext::MD_tbaa, mergeTBAA);
- }
+ for (Instruction *It : DestAllocaInstUsers)
+ It->setMetadata(LLVMContext::MD_tbaa, mergeTBAA);
}
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
>From ae4139ac82f03a535e8a70bc45e788da8a444df1 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 18 Mar 2025 11:46:57 +0100
Subject: [PATCH 4/6] Fix style
Co-authored-by: Shilei Tian <i at tianshilei.me>
---
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 522da906afba9..bf19a921126f1 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1571,9 +1571,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
}
}
- if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa)) {
+ if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa))
AllocaInstUsersWithTBAA.insert(UI);
- }
if (UI->hasMetadata(LLVMContext::MD_noalias))
NoAliasInstrs.insert(UI);
if (!ModRefCallback(UI))
>From 3ab8a710882123b59bbf8c934e051aabaf4529e0 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Tue, 18 Mar 2025 06:33:00 -0500
Subject: [PATCH 5/6] Applied remarks
---
llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll | 79 ++++++++-----------
1 file changed, 32 insertions(+), 47 deletions(-)
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
index b5c33f1dfc750..a2bb656bf196c 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -1,38 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=memcpyopt,dse -S -verify-memoryssa | FileCheck %s
-; The aim of this test is to check if MemCpyOpt pass merges alias tags
-; after memcpy optimization
-; High level overview of this test
-; Input:
-; function test() {
-; //declaration of local arrays a and b
-; //initialization of array b in init_loop
-; //initialization of array a -> copy of array b
-; //use array a in loop
-; }
+define void @test() local_unnamed_addr {
+; CHECK-LABEL: define void @test() local_unnamed_addr {
+; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
+; CHECK-NEXT: br label %[[INIT_LOOP:.*]]
+; CHECK: [[INIT_LOOP]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INIT_LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[DOTPREHEADER55_PREHEADER:.*]], label %[[INIT_LOOP]]
+; CHECK: [[_PREHEADER55_PREHEADER:.*:]]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDVARS_IV73:%.*]] = phi i64 [ 0, %[[DOTPREHEADER55_PREHEADER]] ], [ [[INDVARS_IV_NEXT74:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT74]] = add nuw nsw i64 [[INDVARS_IV73]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV73]]
+; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[EXITCOND76_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT74]], 32
+; CHECK-NEXT: br i1 [[EXITCOND76_NOT]], label %[[LOOP]], [[DOT_CRIT_EDGE56:label %.*]]
+; CHECK: [[__CRIT_EDGE56:.*:]]
+; CHECK-NEXT: ret void
;
-; Expected output after optimization:
-; function test() {
-; //declaration of local array b
-; //initialization of array b in init_loop
-; //use array b in loop
-; }
-
-; ModuleID = 'FIRModule'
-source_filename = "FIRModule"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; CHECK-LABEL: @test(
-; CHECK: [[ARR_UNDER_TEST:%.*]] = alloca [31 x float], align 4
-; CHECK-LABEL: init_loop:
-; CHECK: [[TMP0:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]],
-; CHECK: store float 0x3E6AA51880000000, ptr [[TMP0]], align 4, !tbaa [[ARR_TAG:![0-9]+]]
-; CHECK-LABEL: loop:
-; CHECK: [[TMP2:%.*]] = getelementptr float, ptr [[ARR_UNDER_TEST]], i64 [[TMP3:%.*]]
-; CHECK: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[ARR_TAG]]
-
-define void @test() local_unnamed_addr #0 {
%test_array_a = alloca [31 x float], align 4
%test_array_b = alloca [31 x float], align 4
br label %init_loop
@@ -61,23 +52,9 @@ loop: ; preds = %.preheader, %211
ret void
}
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare i64 @llvm.smax.i64(i64, i64) #1
-
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #2
-
-attributes #0 = { "target-cpu"="x86-64" }
-attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-
-!llvm.module.flags = !{!0, !1, !2}
-!llvm.ident = !{!3}
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg)
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !{i32 8, !"PIC Level", i32 2}
-!2 = !{i32 7, !"PIE Level", i32 2}
-!3 = !{!"flang version 21.0.0 (https://github.com/llvm/llvm-project.git 4d79f420ce5b5100f72f720eab2d3881f97abd0d)"}
!7 = !{!"any data access", !8, i64 0}
!8 = !{!"any access", !9, i64 0}
!9 = !{!"Flang function root test"}
@@ -86,3 +63,11 @@ attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite
!14 = !{!"allocated data", !7, i64 0}
!31 = !{!32, !32, i64 0}
!32 = !{!"allocated data/test_array_b", !14, i64 0}
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"allocated data/test_array_a", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"allocated data", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"any data access", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"any access", [[META5:![0-9]+]], i64 0}
+; CHECK: [[META5]] = !{!"Flang function root test"}
+;.
>From 0824f68e0a20314f3d0e0e1ede38924bcf6e3373 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <dominik.adamski at amd.com>
Date: Fri, 21 Mar 2025 06:36:34 -0500
Subject: [PATCH 6/6] Applied remarks
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 20 ++-
llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll | 118 +++++++++---------
2 files changed, 70 insertions(+), 68 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index bf19a921126f1..48504f6c6a4b2 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1516,8 +1516,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
SmallVector<Instruction *, 4> LifetimeMarkers;
SmallSet<Instruction *, 4> NoAliasInstrs;
bool SrcNotDom = false;
- SmallSet<Instruction *, 4> SrcAllocaInstUsers;
- SmallSet<Instruction *, 4> DestAllocaInstUsers;
+ SmallSet<Instruction *, 4> OptimizedAllocaInstUsers;
// Recursively track the user and check whether modified alias exist.
auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
@@ -1571,7 +1570,8 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
continue;
}
}
- if (UI != Store && UI->hasMetadata(LLVMContext::MD_tbaa))
+ if (UI != Store && (UI->hasMetadata(LLVMContext::MD_tbaa) ||
+ UI->hasMetadata(LLVMContext::MD_tbaa_struct)))
AllocaInstUsersWithTBAA.insert(UI);
if (UI->hasMetadata(LLVMContext::MD_noalias))
NoAliasInstrs.insert(UI);
@@ -1626,7 +1626,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
};
if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
- DestAllocaInstUsers))
+ OptimizedAllocaInstUsers))
return false;
// Bailout if Dest may have any ModRef before Store.
if (!ReachabilityWorklist.empty() &&
@@ -1653,7 +1653,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
};
if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
- SrcAllocaInstUsers))
+ OptimizedAllocaInstUsers))
return false;
// We can do the transformation. First, move the SrcAlloca to the start of the
@@ -1687,12 +1687,10 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
for (Instruction *I : NoAliasInstrs)
I->setMetadata(LLVMContext::MD_noalias, nullptr);
- // If we merge two allocas we need to uniform alias tags as well
- if (!SrcAllocaInstUsers.empty()) {
- MDNode *mergeTBAA =
- (*SrcAllocaInstUsers.begin())->getMetadata(LLVMContext::MD_tbaa);
- for (Instruction *It : DestAllocaInstUsers)
- It->setMetadata(LLVMContext::MD_tbaa, mergeTBAA);
+ // Remove !tbaa and !tbaa_struct from the metadata, since they are invalid.
+ for (Instruction *I : OptimizedAllocaInstUsers) {
+ I->setMetadata(LLVMContext::MD_tbaa, nullptr);
+ I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
}
LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
index a2bb656bf196c..6e446e5ff267c 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy-tbaa.ll
@@ -4,70 +4,74 @@
define void @test() local_unnamed_addr {
; CHECK-LABEL: define void @test() local_unnamed_addr {
; CHECK-NEXT: [[TEST_ARRAY_B:%.*]] = alloca [31 x float], align 4
-; CHECK-NEXT: br label %[[INIT_LOOP:.*]]
-; CHECK: [[INIT_LOOP]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[INIT_LOOP]] ]
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[DOTPREHEADER55_PREHEADER:.*]], label %[[INIT_LOOP]]
-; CHECK: [[_PREHEADER55_PREHEADER:.*:]]
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDVARS_IV73:%.*]] = phi i64 [ 0, %[[DOTPREHEADER55_PREHEADER]] ], [ [[INDVARS_IV_NEXT74:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[INDVARS_IV_NEXT74]] = add nuw nsw i64 [[INDVARS_IV73]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 [[INDVARS_IV73]]
-; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[EXITCOND76_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT74]], 32
-; CHECK-NEXT: br i1 [[EXITCOND76_NOT]], label %[[LOOP]], [[DOT_CRIT_EDGE56:label %.*]]
-; CHECK: [[__CRIT_EDGE56:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT: store float 0x3E6AA51880000000, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TEST_ARRAY_B]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
; CHECK-NEXT: ret void
;
%test_array_a = alloca [31 x float], align 4
%test_array_b = alloca [31 x float], align 4
- br label %init_loop
-
-init_loop:
- %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %init_loop ]
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %1 = getelementptr float, ptr %test_array_b, i64 %indvars.iv
- store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !12
- %exitcond.not = icmp eq i64 %indvars.iv.next, 32
- br i1 %exitcond.not, label %.preheader55.preheader, label %init_loop
-
-.preheader55.preheader:
+ %1 = getelementptr float, ptr %test_array_b, i64 1
+ store float 0x3E6AA51880000000, ptr %1, align 4, !tbaa !4
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(124) %test_array_a, ptr noundef nonnull align 4 dereferenceable(124) %test_array_b, i64 124, i1 false)
- br label %loop
+ %2 = getelementptr float, ptr %test_array_a, i64 1
+ %3 = load float, ptr %2, align 4, !tbaa !7
+ ret void
+}
-loop: ; preds = %.preheader, %211
- %indvars.iv73 = phi i64 [ 0, %.preheader55.preheader ], [ %indvars.iv.next74, %loop ]
- %indvars.iv.next74 = add nuw nsw i64 %indvars.iv73, 1
- %2 = getelementptr float, ptr %test_array_a, i64 %indvars.iv73
- %3 = load float, ptr %2, align 4, !tbaa !31
- %exitcond76.not = icmp eq i64 %indvars.iv.next74, 32
- br i1 %exitcond76.not, label %loop, label %._crit_edge56
+%struct.Outer = type { float, double, %struct.Inner }
+%struct.Inner = type { i32, float }
-._crit_edge56: ; preds = %loop, %._crit_edge
- ret void
+; Function Attrs: nounwind uwtable
+define dso_local float @f() {
+; CHECK-LABEL: define dso_local float @f() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TEST1:%.*]] = alloca [[STRUCT_OUTER:%.*]], align 8
+; CHECK-NEXT: [[F:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT: store float 0.000000e+00, ptr [[F]], align 8
+; CHECK-NEXT: [[F1:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F1]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], 2.000000e+00
+; CHECK-NEXT: store float [[ADD]], ptr [[F1]], align 8
+; CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds nuw [[STRUCT_OUTER]], ptr [[TEST1]], i32 0, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F2]], align 8
+; CHECK-NEXT: ret float [[TMP1]]
+;
+entry:
+ %test = alloca %struct.Outer, align 8
+ %test1 = alloca %struct.Outer, align 8
+ %f = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 0
+ store float 0.000000e+00, ptr %f, align 8, !tbaa !9
+ %inner_a = getelementptr inbounds nuw %struct.Outer, ptr %test1, i32 0, i32 2
+ %i = getelementptr inbounds nuw %struct.Inner, ptr %inner_a, i32 0, i32 0
+ store i32 0, ptr %i, align 8, !tbaa !17
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %test, ptr align 8 %test1, i64 24, i1 false)
+ %f1 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+ %0 = load float, ptr %f1, align 8, !tbaa !9
+ %add = fadd float %0, 2.000000e+00
+ store float %add, ptr %f1, align 8, !tbaa !9
+ %f2 = getelementptr inbounds nuw %struct.Outer, ptr %test, i32 0, i32 0
+ %1 = load float, ptr %f2, align 8, !tbaa !9
+ ret float %1
}
-; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg)
+!1 = !{!"any data access", !2, i64 0}
+!2 = !{!"any access", !3, i64 0}
+!3 = !{!"Flang function root test"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"allocated data/test_array_a", !6, i64 0}
+!6 = !{!"allocated data", !1, i64 0}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"allocated data/test_array_b", !6, i64 0}
+!9 = !{!10, !11, i64 0}
+!10 = !{!"Outer", !11, i64 0, !14, i64 8, !15, i64 16}
+!11 = !{!"float", !12, i64 0}
+!12 = !{!"omnipotent char", !13, i64 0}
+!13 = !{!"Simple C/C++ TBAA"}
+!14 = !{!"double", !12, i64 0}
+!15 = !{!"Inner", !16, i64 0, !11, i64 4}
+!16 = !{!"int", !12, i64 0}
+!17 = !{!10, !16, i64 16}
+
-!7 = !{!"any data access", !8, i64 0}
-!8 = !{!"any access", !9, i64 0}
-!9 = !{!"Flang function root test"}
-!12 = !{!13, !13, i64 0}
-!13 = !{!"allocated data/test_array_a", !14, i64 0}
-!14 = !{!"allocated data", !7, i64 0}
-!31 = !{!32, !32, i64 0}
-!32 = !{!"allocated data/test_array_b", !14, i64 0}
-;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
-; CHECK: [[META1]] = !{!"allocated data/test_array_a", [[META2:![0-9]+]], i64 0}
-; CHECK: [[META2]] = !{!"allocated data", [[META3:![0-9]+]], i64 0}
-; CHECK: [[META3]] = !{!"any data access", [[META4:![0-9]+]], i64 0}
-; CHECK: [[META4]] = !{!"any access", [[META5:![0-9]+]], i64 0}
-; CHECK: [[META5]] = !{!"Flang function root test"}
-;.
More information about the llvm-commits
mailing list