[llvm-branch-commits] [llvm] [SROA] Use !tbaa instead of !tbaa.struct if op matches field. (PR #81289)
Florian Hahn via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 9 10:29:30 PST 2024
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/81289
If a split memory access introduced by SROA accesses precisely a single field of the original operation's !tbaa.struct, use the !tbaa tag for the accessed field directly instead of the full !tbaa.struct.
InstCombine already had a similar logic.
Motivation for this and follow-on patches is to improve codegen for libc++, where using memcpy limits optimizations, like vectorization for code iteration over std::vector<std::complex<float>>: https://godbolt.org/z/f3vqYos3c
Depends on https://github.com/llvm/llvm-project/pull/81285.
>From 90639e9131670863ebb4c199a9861b2b0094d601 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 9 Feb 2024 15:17:09 +0000
Subject: [PATCH] [SROA] Use !tbaa instead of !tbaa.struct if op matches field.
If a split memory access introduced by SROA accesses precisely a single
field of the original operation's !tbaa.struct, use the !tbaa tag for
the accessed field directly instead of the full !tbaa.struct.
InstCombine already had a similar logic.
Motivation for this and follow-on patches is to improve codegen for
libc++, where using memcpy limits optimizations, like vectorization for
code iteration over std::vector<std::complex<float>>:
https://godbolt.org/z/f3vqYos3c
Depends on https://github.com/llvm/llvm-project/pull/81285.
---
llvm/include/llvm/IR/Metadata.h | 2 +
llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 13 ++++++
llvm/lib/Transforms/Scalar/SROA.cpp | 48 ++++++++++++++------
llvm/test/Transforms/SROA/tbaa-struct2.ll | 21 ++++-----
llvm/test/Transforms/SROA/tbaa-struct3.ll | 16 +++----
5 files changed, 67 insertions(+), 33 deletions(-)
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 6f23ac44dee968..33363a271d4823 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -849,6 +849,8 @@ struct AAMDNodes {
/// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the
/// field at offset 0, get the TBAA tag describing the accessed field.
AAMDNodes adjustForAccess(unsigned AccessSize);
+ AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy,
+ const DataLayout &DL);
};
// Specialize DenseMapInfo for AAMDNodes.
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bfd70414c0340c..b2dc451d581939 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -833,3 +833,16 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
}
return New;
}
+
+AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy,
+ const DataLayout &DL) {
+
+ AAMDNodes New = shift(Offset);
+ if (!DL.typeSizeEqualsStoreSize(AccessTy))
+ return New;
+ TypeSize Size = DL.getTypeStoreSize(AccessTy);
+ if (Size.isScalable())
+ return New;
+
+ return New.adjustForAccess(Size.getKnownMinValue());
+}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 138dc38b5c14ce..f24cbbc1fe0591 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2914,7 +2914,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
// Do this after copyMetadataForLoad() to preserve the TBAA shift.
if (AATags)
- NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewLI->setAAMetadata(AATags.adjustForAccess(
+ NewBeginOffset - BeginOffset, NewLI->getType(), DL));
// Try to preserve nonnull metadata
V = NewLI;
@@ -2936,7 +2937,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(), LI.isVolatile(), LI.getName());
if (AATags)
- NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewLI->setAAMetadata(AATags.adjustForAccess(
+ NewBeginOffset - BeginOffset, NewLI->getType(), DL));
+
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
@@ -3011,7 +3014,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
Pass.DeadInsts.push_back(&SI);
// NOTE: Careful to use OrigV rather than V.
@@ -3038,7 +3042,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
Store, Store->getPointerOperand(),
@@ -3097,8 +3102,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
}
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
- if (AATags)
- NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ if (AATags) {
+ NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
+ }
if (SI.isVolatile())
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
if (NewSI->isAtomic())
@@ -3280,8 +3287,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
- if (AATags)
- New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ if (AATags) {
+ New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
+ }
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
New, New->getPointerOperand(), V, DL);
@@ -3486,7 +3495,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ Load->getType(), DL));
Src = Load;
}
@@ -3507,8 +3517,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
- if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ if (AATags) {
+ Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ Src->getType(), DL));
+ }
APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
if (IsDest) {
@@ -3836,7 +3848,8 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
if (AATags &&
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
- Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+ Load->setAAMetadata(
+ AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
@@ -3887,8 +3900,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
APInt Offset(
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
- if (AATags)
- Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+ if (AATags) {
+ Store->setAAMetadata(AATags.adjustForAccess(
+ Offset.getZExtValue(), ExtractValue->getType(), DL));
+ }
// migrateDebugInfo requires the base Alloca. Walk to it from this gep.
// If we cannot (because there's an intervening non-const or unbounded
@@ -4542,6 +4557,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
Value *StoreBasePtr = SI->getPointerOperand();
IRB.SetInsertPoint(SI);
+ AAMDNodes AATags = SI->getAAMetadata();
LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
@@ -4561,6 +4577,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group,
LLVMContext::MD_DIAssignID});
+
+ if (AATags)
+ PStore->setAAMetadata(
+ AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll
index 1fd37e82d67775..02c99a2b329457 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct2.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll
@@ -13,9 +13,9 @@ define double @bar(ptr %wishart) {
; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4
; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8
-; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
+; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA5:![0-9]+]]
; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]])
; CHECK-NEXT: ret double [[CALL]]
;
@@ -38,15 +38,14 @@ define double @bar(ptr %wishart) {
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
;.
-; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, !1, i64 8, i64 4, !5}
-; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0}
-; CHECK: [[META2:![0-9]+]] = !{!"double", !3, i64 0}
-; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0}
-; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"}
-; CHECK: [[META5:![0-9]+]] = !{!6, !6, i64 0}
-; CHECK: [[META6:![0-9]+]] = !{!"int", !3, i64 0}
-; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 4, !5}
-; CHECK: [[TBAA_STRUCT8]] = !{}
+; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, [[META1:![0-9]+]], i64 8, i64 4, [[TBAA5]]}
+; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"int", [[META3]], i64 0}
+; CHECK: [[TBAA_STRUCT7]] = !{}
;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-MODIFY-CFG: {{.*}}
diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll
index 4910e0e07ae380..603e7d708647fc 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct3.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll
@@ -7,9 +7,9 @@ define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(2
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[B]] to i32
-; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
; CHECK-NEXT: [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4
-; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4
+; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA1:![0-9]+]]
; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[RES]], align 8
; CHECK-NEXT: ret void
;
@@ -29,9 +29,9 @@ define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) {
; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8
-; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
+; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0]]
; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
-; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
+; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA1]]
; CHECK-NEXT: ret void
;
entry:
@@ -53,7 +53,7 @@ define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceabl
; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B]] to i32
; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
-; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5]]
+; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
; CHECK-NEXT: ret void
;
entry:
@@ -98,10 +98,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}
;.
-; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[META1:![0-9]+]], i64 4, i64 4, [[META1]]}
-; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 4, [[TBAA1]]}
+; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0}
; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[META1]]}
+; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]}
;.
More information about the llvm-branch-commits
mailing list