[clang] [llvm] [SROA] Use !tbaa instead of !tbaa.struct if op matches field. (PR #81289)

Florian Hahn via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 16 05:41:48 PST 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/81289

>From e6de9f21b162c57dd09cb4de3147b7ab09ef8681 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 9 Feb 2024 13:29:19 +0000
Subject: [PATCH 1/5] [TBAA] Extract logic to use TBAA tag for field of
 !tbaa.struct (NFC).

---
 llvm/include/llvm/IR/Metadata.h                   |  5 +++++
 llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp      | 14 ++++++++++++++
 .../Transforms/InstCombine/InstCombineCalls.cpp   | 15 +--------------
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index db1f44fea3b459..6f23ac44dee968 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -844,6 +844,11 @@ struct AAMDNodes {
   /// together. Different from `merge`, where different locations should
   /// overlap each other, `concat` puts non-overlapping locations together.
   AAMDNodes concat(const AAMDNodes &Other) const;
+
+  /// Create a new AAMDNode for accessing \p AccessSize bytes of this AAMDNode.
+  /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the
+  /// field at offset 0, get the TBAA tag describing the accessed field.
+  AAMDNodes adjustForAccess(unsigned AccessSize);
 };
 
 // Specialize DenseMapInfo for AAMDNodes.
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index e4dc1a867f6f0c..edc08cde686f1f 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -817,3 +817,17 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
       ConstantAsMetadata::get(ConstantInt::get(PreviousSize->getType(), Len));
   return MDNode::get(MD->getContext(), NextNodes);
 }
+
+AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
+  AAMDNodes New = *this;
+  MDNode *M = New.TBAAStruct;
+  New.TBAAStruct = nullptr;
+  if (M && M->getNumOperands() == 3 && M->getOperand(0) &&
+      mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
+      mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
+      M->getOperand(1) && mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
+      mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == AccessSize &&
+      M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
+    New.TBAA = cast<MDNode>(M->getOperand(2));
+  return New;
+}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ed5d44757fbeb6..56d1259e955196 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -172,20 +172,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
 
   // If the memcpy has metadata describing the members, see if we can get the
   // TBAA tag describing our copy.
-  AAMDNodes AACopyMD = MI->getAAMetadata();
-
-  if (MDNode *M = AACopyMD.TBAAStruct) {
-    AACopyMD.TBAAStruct = nullptr;
-    if (M->getNumOperands() == 3 && M->getOperand(0) &&
-        mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
-        mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
-        M->getOperand(1) &&
-        mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
-        mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
-        Size &&
-        M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
-      AACopyMD.TBAA = cast<MDNode>(M->getOperand(2));
-  }
+  AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
 
   Value *Src = MI->getArgOperand(1);
   Value *Dest = MI->getArgOperand(0);

>From 99cf032dfabb21b820559bae61d2354e56336fdd Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 9 Feb 2024 16:25:32 +0000
Subject: [PATCH 2/5] [TBAA] Only clear TBAAStruct if field can be extracted.

Retain TBAAStruct if we fail to match the access to a single field. All
users at the moment use this when using the full size of the original
access. SROA also retains the original TBAAStruct when accessing parts
at offset 0.

Motivation for this and follow-on patches is to improve codegen for
libc++, where using memcpy limits optimizations, like vectorization for
code iteration over std::vector<std::complex<float>>:
https://godbolt.org/z/f3vqYos3c

Depends on https://github.com/llvm/llvm-project/pull/81284
---
 llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp           | 8 +++++---
 llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll | 5 +++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index edc08cde686f1f..bfd70414c0340c 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -821,13 +821,15 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
 AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
   AAMDNodes New = *this;
   MDNode *M = New.TBAAStruct;
-  New.TBAAStruct = nullptr;
   if (M && M->getNumOperands() == 3 && M->getOperand(0) &&
       mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
       mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
       M->getOperand(1) && mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
-      mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == AccessSize &&
-      M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
+      mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
+          AccessSize &&
+      M->getOperand(2) && isa<MDNode>(M->getOperand(2))) {
+    New.TBAAStruct = nullptr;
     New.TBAA = cast<MDNode>(M->getOperand(2));
+  }
   return New;
 }
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
index 1042c413fbb7bb..996d2c0e67e165 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -38,8 +38,8 @@ define ptr @test2() {
 define void @test3_multiple_fields(ptr nocapture %a, ptr nocapture %b) {
 ; CHECK-LABEL: @test3_multiple_fields(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    store i64 [[TMP0]], ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 4, !tbaa.struct [[TBAA_STRUCT3:![0-9]+]]
+; CHECK-NEXT:    store i64 [[TMP0]], ptr [[A:%.*]], align 4, !tbaa.struct [[TBAA_STRUCT3]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -86,4 +86,5 @@ entry:
 ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"float", [[META2:![0-9]+]]}
 ; CHECK: [[META2]] = !{!"Simple C/C++ TBAA"}
+; CHECK: [[TBAA_STRUCT3]] = !{i64 0, i64 4, [[TBAA0]], i64 4, i64 4, [[TBAA0]]}
 ;.

>From 90639e9131670863ebb4c199a9861b2b0094d601 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 9 Feb 2024 15:17:09 +0000
Subject: [PATCH 3/5] [SROA] Use !tbaa instead of !tbaa.struct if op matches
 field.

If a split memory access introduced by SROA accesses precisely a single
field of the original operation's !tbaa.struct, use the !tbaa tag for
the accessed field directly instead of the full !tbaa.struct.

InstCombine already had a similar logic.

Motivation for this and follow-on patches is to improve codegen for
libc++, where using memcpy limits optimizations, like vectorization for
code iteration over std::vector<std::complex<float>>:
https://godbolt.org/z/f3vqYos3c

Depends on https://github.com/llvm/llvm-project/pull/81285.
---
 llvm/include/llvm/IR/Metadata.h              |  2 +
 llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 13 ++++++
 llvm/lib/Transforms/Scalar/SROA.cpp          | 48 ++++++++++++++------
 llvm/test/Transforms/SROA/tbaa-struct2.ll    | 21 ++++-----
 llvm/test/Transforms/SROA/tbaa-struct3.ll    | 16 +++----
 5 files changed, 67 insertions(+), 33 deletions(-)

diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 6f23ac44dee968..33363a271d4823 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -849,6 +849,8 @@ struct AAMDNodes {
   /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the
   /// field at offset 0, get the TBAA tag describing the accessed field.
   AAMDNodes adjustForAccess(unsigned AccessSize);
+  AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy,
+                            const DataLayout &DL);
 };
 
 // Specialize DenseMapInfo for AAMDNodes.
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bfd70414c0340c..b2dc451d581939 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -833,3 +833,16 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
   }
   return New;
 }
+
+AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy,
+                                     const DataLayout &DL) {
+
+  AAMDNodes New = shift(Offset);
+  if (!DL.typeSizeEqualsStoreSize(AccessTy))
+    return New;
+  TypeSize Size = DL.getTypeStoreSize(AccessTy);
+  if (Size.isScalable())
+    return New;
+
+  return New.adjustForAccess(Size.getKnownMinValue());
+}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 138dc38b5c14ce..f24cbbc1fe0591 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2914,7 +2914,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
 
       // Do this after copyMetadataForLoad() to preserve the TBAA shift.
       if (AATags)
-        NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+        NewLI->setAAMetadata(AATags.adjustForAccess(
+            NewBeginOffset - BeginOffset, NewLI->getType(), DL));
 
       // Try to preserve nonnull metadata
       V = NewLI;
@@ -2936,7 +2937,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
           IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
                                 getSliceAlign(), LI.isVolatile(), LI.getName());
       if (AATags)
-        NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+        NewLI->setAAMetadata(AATags.adjustForAccess(
+            NewBeginOffset - BeginOffset, NewLI->getType(), DL));
+
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
       NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
@@ -3011,7 +3014,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
     if (AATags)
-      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+      Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+                                                  V->getType(), DL));
     Pass.DeadInsts.push_back(&SI);
 
     // NOTE: Careful to use OrigV rather than V.
@@ -3038,7 +3042,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
     if (AATags)
-      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+      Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+                                                  V->getType(), DL));
 
     migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
                      Store, Store->getPointerOperand(),
@@ -3097,8 +3102,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     }
     NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
-    if (AATags)
-      NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+    if (AATags) {
+      NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+                                                  V->getType(), DL));
+    }
     if (SI.isVolatile())
       NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
     if (NewSI->isAtomic())
@@ -3280,8 +3287,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
         IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
     New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
                            LLVMContext::MD_access_group});
-    if (AATags)
-      New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+    if (AATags) {
+      New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+                                                V->getType(), DL));
+    }
 
     migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
                      New, New->getPointerOperand(), V, DL);
@@ -3486,7 +3495,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
                               LLVMContext::MD_access_group});
       if (AATags)
-        Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+        Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+                                                   Load->getType(), DL));
       Src = Load;
     }
 
@@ -3507,8 +3517,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
         IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
     Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
-    if (AATags)
-      Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+    if (AATags) {
+      Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+                                                  Src->getType(), DL));
+    }
 
     APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
     if (IsDest) {
@@ -3836,7 +3848,8 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
           DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
       if (AATags &&
           GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
-        Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+        Load->setAAMetadata(
+            AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
 
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       LLVM_DEBUG(dbgs() << "          to: " << *Load << "\n");
@@ -3887,8 +3900,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
       APInt Offset(
           DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
       GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
-      if (AATags)
-        Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+      if (AATags) {
+        Store->setAAMetadata(AATags.adjustForAccess(
+            Offset.getZExtValue(), ExtractValue->getType(), DL));
+      }
 
       // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
       // If we cannot (because there's an intervening non-const or unbounded
@@ -4542,6 +4557,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
 
       Value *StoreBasePtr = SI->getPointerOperand();
       IRB.SetInsertPoint(SI);
+      AAMDNodes AATags = SI->getAAMetadata();
 
       LLVM_DEBUG(dbgs() << "    Splitting store of load: " << *SI << "\n");
 
@@ -4561,6 +4577,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
         PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
                                    LLVMContext::MD_access_group,
                                    LLVMContext::MD_DIAssignID});
+
+        if (AATags)
+          PStore->setAAMetadata(
+              AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
         LLVM_DEBUG(dbgs() << "      +" << PartOffset << ":" << *PStore << "\n");
       }
 
diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll
index 1fd37e82d67775..02c99a2b329457 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct2.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll
@@ -13,9 +13,9 @@ define double @bar(ptr %wishart) {
 ; CHECK-NEXT:    [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4
 ; CHECK-NEXT:    [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8
-; CHECK-NEXT:    [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
+; CHECK-NEXT:    [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA5:![0-9]+]]
 ; CHECK-NEXT:    [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]])
 ; CHECK-NEXT:    ret double [[CALL]]
 ;
@@ -38,15 +38,14 @@ define double @bar(ptr %wishart) {
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
-; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, !1, i64 8, i64 4, !5}
-; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0}
-; CHECK: [[META2:![0-9]+]] = !{!"double", !3, i64 0}
-; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0}
-; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"}
-; CHECK: [[META5:![0-9]+]] = !{!6, !6, i64 0}
-; CHECK: [[META6:![0-9]+]] = !{!"int", !3, i64 0}
-; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 4, !5}
-; CHECK: [[TBAA_STRUCT8]] = !{}
+; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, [[META1:![0-9]+]], i64 8, i64 4, [[TBAA5]]}
+; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"int", [[META3]], i64 0}
+; CHECK: [[TBAA_STRUCT7]] = !{}
 ;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-MODIFY-CFG: {{.*}}
diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll
index 4910e0e07ae380..603e7d708647fc 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct3.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll
@@ -7,9 +7,9 @@ define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(2
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float [[B]] to i32
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[RES]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
 ; CHECK-NEXT:    [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4
-; CHECK-NEXT:    store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[P:%.*]] = load ptr, ptr [[RES]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -29,9 +29,9 @@ define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) {
 ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8
-; CHECK-NEXT:    store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]]
+; CHECK-NEXT:    store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0]]
 ; CHECK-NEXT:    [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
-; CHECK-NEXT:    store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
+; CHECK-NEXT:    store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -53,7 +53,7 @@ define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceabl
 ; CHECK-NEXT:    [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[B]] to i32
 ; CHECK-NEXT:    [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
-; CHECK-NEXT:    store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5]]
+; CHECK-NEXT:    store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -98,10 +98,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
 !3 = !{!"omnipotent char", !4, i64 0}
 !4 = !{!"Simple C++ TBAA"}
 ;.
-; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[META1:![0-9]+]], i64 4, i64 4, [[META1]]}
-; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
+; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 4, [[TBAA1]]}
+; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0}
 ; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
-; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[META1]]}
+; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]}
 ;.

>From 137fe547415c6196498405f23e11f2fc4831675a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 15 Feb 2024 21:30:21 +0000
Subject: [PATCH 4/5] !fixup update new test cases, add handle missing cases.

---
 llvm/include/llvm/IR/Metadata.h              |  1 +
 llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp |  6 ++-
 llvm/lib/Transforms/Scalar/SROA.cpp          | 16 +++---
 llvm/test/Transforms/SROA/tbaa-struct3.ll    | 51 ++++++++++++--------
 4 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 94065b52740a5d..da6744fdd09166 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -851,6 +851,7 @@ struct AAMDNodes {
   AAMDNodes adjustForAccess(unsigned AccessSize);
   AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy,
                             const DataLayout &DL);
+  AAMDNodes adjustForAccess(size_t Offset, unsigned AccessSize);
 };
 
 // Specialize DenseMapInfo for AAMDNodes.
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index b2dc451d581939..a8c881fce5fd4d 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -836,7 +836,6 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) {
 
 AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy,
                                      const DataLayout &DL) {
-
   AAMDNodes New = shift(Offset);
   if (!DL.typeSizeEqualsStoreSize(AccessTy))
     return New;
@@ -846,3 +845,8 @@ AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy,
 
   return New.adjustForAccess(Size.getKnownMinValue());
 }
+
+AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) {
+  AAMDNodes New = shift(Offset);
+  return New.adjustForAccess(AccessSize);
+}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index f24cbbc1fe0591..6c8785d52c4eab 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2936,6 +2936,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
       LoadInst *NewLI =
           IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
                                 getSliceAlign(), LI.isVolatile(), LI.getName());
+
       if (AATags)
         NewLI->setAAMetadata(AATags.adjustForAccess(
             NewBeginOffset - BeginOffset, NewLI->getType(), DL));
@@ -3102,10 +3103,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     }
     NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
-    if (AATags) {
+    if (AATags)
       NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
                                                   V->getType(), DL));
-    }
     if (SI.isVolatile())
       NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
     if (NewSI->isAtomic())
@@ -3207,12 +3207,14 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
     // a single value type, just emit a memset.
     if (!CanContinue) {
       Type *SizeTy = II.getLength()->getType();
-      Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
+      unsigned Sz = NewEndOffset - NewBeginOffset;
+      Constant *Size = ConstantInt::get(SizeTy, Sz);
       MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
           getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
           MaybeAlign(getSliceAlign()), II.isVolatile()));
       if (AATags)
-        New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+        New->setAAMetadata(
+            AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
 
       migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
                        New, New->getRawDest(), nullptr, DL);
@@ -3287,10 +3289,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
         IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
     New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
                            LLVMContext::MD_access_group});
-    if (AATags) {
+    if (AATags)
       New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
                                                 V->getType(), DL));
-    }
 
     migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
                      New, New->getPointerOperand(), V, DL);
@@ -3517,10 +3518,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
         IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
     Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
                              LLVMContext::MD_access_group});
-    if (AATags) {
+    if (AATags)
       Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
                                                   Src->getType(), DL));
-    }
 
     APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
     if (IsDest) {
diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll
index a3414e8ace538d..2a151c01adfcc0 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct3.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll
@@ -100,7 +100,7 @@ define void @store_vector_part_first(ptr %y2, float %f) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
 ; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa.struct [[TBAA_STRUCT6:![0-9]+]]
 ; CHECK-NEXT:    [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 8
-; CHECK-NEXT:    store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
+; CHECK-NEXT:    store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float, float, float }
@@ -118,7 +118,7 @@ define void @store_vector_part_second(ptr %y2, float %f) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
 ; CHECK-NEXT:    store float [[F]], ptr [[Y2]], align 8, !tbaa.struct [[TBAA_STRUCT9:![0-9]+]]
 ; CHECK-NEXT:    [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 4
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa.struct [[TBAA_STRUCT10:![0-9]+]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[TBAA7:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float, float, float }
@@ -134,7 +134,7 @@ define void @store_vector_single(ptr %y2, float %f) {
 ; CHECK-LABEL: define void @store_vector_single(
 ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) {
 ; CHECK-NEXT:    [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]])
-; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa.struct [[TBAA_STRUCT11:![0-9]+]]
+; CHECK-NEXT:    store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa.struct [[TBAA_STRUCT10:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %x7 = alloca { float, float }
@@ -161,8 +161,8 @@ define void @memset(ptr %dst, ptr align 8 %src) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_0_SRC_SROA_IDX]], i32 10, i1 false)
 ; CHECK-NEXT:    store i16 1, ptr [[A_SROA_3]], align 2
 ; CHECK-NEXT:    [[A_SROA_0_1_A_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1
-; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false), !tbaa.struct [[TBAA_STRUCT12:![0-9]+]]
-; CHECK-NEXT:    store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa.struct [[TBAA_STRUCT13:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false), !tbaa.struct [[TBAA_STRUCT11:![0-9]+]]
+; CHECK-NEXT:    store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 7, i1 true)
 ; CHECK-NEXT:    [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 7
 ; CHECK-NEXT:    [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A_SROA_3]], align 2
@@ -199,8 +199,8 @@ define void @memset2(ptr %dst, ptr align 8 %src) {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 2 [[A_SROA_4_0_SRC_SROA_IDX]], i32 90, i1 false)
 ; CHECK-NEXT:    store i8 1, ptr [[A_SROA_3]], align 1
 ; CHECK-NEXT:    [[A_SROA_0_202_A_202_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 202
-; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa.struct [[TBAA_STRUCT14:![0-9]+]]
-; CHECK-NEXT:    store i8 42, ptr [[A_SROA_3]], align 1, !tbaa.struct [[TBAA_STRUCT15:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa.struct [[TBAA_STRUCT12:![0-9]+]]
+; CHECK-NEXT:    store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[TBAA7]]
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 209, i1 true)
 ; CHECK-NEXT:    [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 209
 ; CHECK-NEXT:    [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i8, ptr [[A_SROA_3]], align 1
@@ -240,7 +240,7 @@ define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-NEXT:    [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6
 ; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <2 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_SROA_0]], align 4
-; CHECK-NEXT:    store <2 x i8> bitcast (<1 x i16> <i16 123> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT16:![0-9]+]]
+; CHECK-NEXT:    store <2 x i8> bitcast (<1 x i16> <i16 123> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT13:![0-9]+]]
 ; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_2_SROA_0]], align 4
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4]], ptr [[DST_2]], align 2
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 6, i1 true)
@@ -279,8 +279,8 @@ define void @slice_store_v2i8_2(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-NEXT:    store i8 [[A_SROA_0_SROA_4_1_COPYLOAD]], ptr [[A_SROA_0_SROA_4]], align 1
 ; CHECK-NEXT:    [[A_SROA_4_1_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_1_SRC_SROA_IDX]], i32 5, i1 false)
-; CHECK-NEXT:    store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2, !tbaa.struct [[TBAA_STRUCT17:![0-9]+]]
-; CHECK-NEXT:    store i8 0, ptr [[A_SROA_0_SROA_4]], align 1, !tbaa.struct [[TBAA_STRUCT18:![0-9]+]]
+; CHECK-NEXT:    store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2, !tbaa.struct [[TBAA_STRUCT14:![0-9]+]]
+; CHECK-NEXT:    store i8 0, ptr [[A_SROA_0_SROA_4]], align 1, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4]], ptr [[DST_2]], align 2
 ; CHECK-NEXT:    [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_COPYLOAD3:%.*]] = load volatile <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2
@@ -317,7 +317,7 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[TMP_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8
 ; CHECK-NEXT:    [[TMP_SROA_3_0_COPYLOAD:%.*]] = load i64, ptr [[TMP_SROA_3_0_SRC_SROA_IDX]], align 8
 ; CHECK-NEXT:    store i64 [[TMP_SROA_3_0_COPYLOAD]], ptr [[TMP_SROA_3]], align 8
-; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT10]]
+; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[TBAA7]]
 ; CHECK-NEXT:    [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1:%.*]] = load volatile double, ptr [[TMP_SROA_0]], align 8
 ; CHECK-NEXT:    store volatile double [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 8
 ; CHECK-NEXT:    [[TMP_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8
@@ -356,7 +356,7 @@ define i32 @shorten_integer_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT19:![0-9]+]]
+; CHECK-NEXT:    store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA7]]
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4
 ; CHECK-NEXT:    store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1
@@ -393,7 +393,7 @@ define <2 x i16> @shorten_vector_store_single_fields(ptr %dst, ptr %dst.2, ptr %
 ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8
-; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT19]]
+; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT15:![0-9]+]]
 ; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 8 [[A_SROA_0]], i32 4, i1 true)
 ; CHECK-NEXT:    ret <2 x i16> [[A_SROA_0_0_A_SROA_0_0_L]]
@@ -429,11 +429,11 @@ define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[A3_SROA_5_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
 ; CHECK-NEXT:    [[A3_SROA_5_0_COPYLOAD:%.*]] = load i8, ptr [[A3_SROA_5_0_SRC_SROA_IDX]], align 1
 ; CHECK-NEXT:    store i8 [[A3_SROA_5_0_COPYLOAD]], ptr [[A3_SROA_5]], align 1
-; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT20:![0-9]+]]
+; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT16:![0-9]+]]
 ; CHECK-NEXT:    [[LOAD4_FCA_0_INSERT:%.*]] = insertvalue { i16, float, i8 } poison, i16 [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD]], 0
-; CHECK-NEXT:    [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
+; CHECK-NEXT:    [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT17:![0-9]+]]
 ; CHECK-NEXT:    [[LOAD4_FCA_1_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_0_INSERT]], float [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD]], 1
-; CHECK-NEXT:    [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa.struct [[TBAA_STRUCT15]]
+; CHECK-NEXT:    [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA7]]
 ; CHECK-NEXT:    [[LOAD4_FCA_2_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_1_INSERT]], i8 [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD]], 2
 ; CHECK-NEXT:    [[UNWRAP2:%.*]] = extractvalue { i16, float, i8 } [[LOAD4_FCA_2_INSERT]], 1
 ; CHECK-NEXT:    [[VALCAST2:%.*]] = bitcast float [[UNWRAP2]] to i32
@@ -492,11 +492,11 @@ define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) {
 ; CHECK-NEXT:    [[I_2:%.*]] = insertvalue { i16, float, i8 } [[I_1]], float 3.000000e+00, 1
 ; CHECK-NEXT:    [[I_3:%.*]] = insertvalue { i16, float, i8 } [[I_2]], i8 99, 2
 ; CHECK-NEXT:    [[I_3_FCA_0_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 0
-; CHECK-NEXT:    store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT20]]
+; CHECK-NEXT:    store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT16]]
 ; CHECK-NEXT:    [[I_3_FCA_1_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 1
-; CHECK-NEXT:    store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT21]]
+; CHECK-NEXT:    store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT17]]
 ; CHECK-NEXT:    [[I_3_FCA_2_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 2
-; CHECK-NEXT:    store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa.struct [[TBAA_STRUCT15]]
+; CHECK-NEXT:    store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA7]]
 ; CHECK-NEXT:    [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A3_SROA_0]], align 8
 ; CHECK-NEXT:    store volatile i16 [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 1
 ; CHECK-NEXT:    [[A3_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
@@ -553,4 +553,17 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias
 ; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0}
 ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
 ; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]}
+; CHECK: [[TBAA_STRUCT6]] = !{i64 0, i64 8, [[TBAA7]], i64 8, i64 4, [[TBAA1]]}
+; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0}
+; CHECK: [[META8]] = !{!"v2f32", [[META3]], i64 0}
+; CHECK: [[TBAA_STRUCT9]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 8, [[TBAA7]]}
+; CHECK: [[TBAA_STRUCT10]] = !{i64 0, i64 8, [[TBAA7]], i64 4, i64 8, [[TBAA1]]}
+; CHECK: [[TBAA_STRUCT11]] = !{i64 0, i64 2, [[TBAA1]], i64 2, i64 6, [[TBAA1]]}
+; CHECK: [[TBAA_STRUCT12]] = !{i64 0, i64 7, [[TBAA7]], i64 7, i64 1, [[TBAA7]]}
+; CHECK: [[TBAA_STRUCT13]] = !{i64 0, i64 2, [[TBAA1]], i64 2, i64 2, [[TBAA1]]}
+; CHECK: [[TBAA_STRUCT14]] = !{i64 0, i64 3, [[TBAA1]]}
+; CHECK: [[TBAA_STRUCT15]] = !{i64 0, i64 4, [[TBAA7]]}
+; CHECK: [[TBAA_STRUCT16]] = !{i64 0, i64 2, [[TBAA7]], i64 4, i64 4, [[TBAA7]], i64 8, i64 1, [[TBAA7]]}
+; CHECK: [[TBAA_STRUCT17]] = !{i64 0, i64 4, [[TBAA7]], i64 4, i64 1, [[TBAA7]]}
 ;.

>From 51c915cd9da740a4a93a5f86b4b6a381206ee480 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 16 Feb 2024 13:41:13 +0000
Subject: [PATCH 5/5] !fixup update clang test.

---
 clang/test/CodeGen/aarch64-ABI-align-packed.c | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c
index 93b81d50261bf8..2b029f64589567 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c
@@ -59,7 +59,7 @@ struct non_packed_struct gs_non_packed_struct;
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_NON_PACKED_STRUCT_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    store <8 x i16> [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_non_packed_struct, align 16, !tbaa.struct [[TBAA_STRUCT6:![0-9]+]]
+// CHECK-NEXT:    store <8 x i16> [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_non_packed_struct, align 16, !tbaa [[TBAA6:![0-9]+]]
 // CHECK-NEXT:    ret void
 __attribute__((noinline)) void named_arg_non_packed_struct(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
@@ -114,7 +114,7 @@ struct packed_struct gs_packed_struct;
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_STRUCT_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x i16> [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_struct, align 1, !tbaa.struct [[TBAA_STRUCT6]]
+// CHECK-NEXT:    store <8 x i16> [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_struct, align 1, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 __attribute__((noinline)) void named_arg_packed_struct(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
@@ -169,7 +169,7 @@ struct packed_member gs_packed_member;
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_MEMBER_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x i16> [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_member, align 1, !tbaa.struct [[TBAA_STRUCT6]]
+// CHECK-NEXT:    store <8 x i16> [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT]], ptr @gs_packed_member, align 1, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 __attribute__((noinline)) void named_arg_packed_member(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
@@ -224,7 +224,7 @@ struct aligned_struct_8 gs_aligned_struct_8;
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_STRUCT_8_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x i16> [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_struct_8, align 16, !tbaa.struct [[TBAA_STRUCT6]]
+// CHECK-NEXT:    store <8 x i16> [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_struct_8, align 16, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 __attribute__((noinline)) void named_arg_aligned_struct_8(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
@@ -279,7 +279,7 @@ struct aligned_member_8 gs_aligned_member_8;
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_MEMBER_8_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x i16> [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_member_8, align 16, !tbaa.struct [[TBAA_STRUCT6]]
+// CHECK-NEXT:    store <8 x i16> [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT]], ptr @gs_aligned_member_8, align 16, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 __attribute__((noinline)) void named_arg_aligned_member_8(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
@@ -334,7 +334,7 @@ struct pragma_packed_struct_8 gs_pragma_packed_struct_8;
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_8_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_8, align 8, !tbaa.struct [[TBAA_STRUCT6]]
+// CHECK-NEXT:    store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_8, align 8, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 __attribute__((noinline)) void named_arg_pragma_packed_struct_8(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
@@ -389,7 +389,7 @@ struct pragma_packed_struct_4 gs_pragma_packed_struct_4;
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_4_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_4, align 4, !tbaa.struct [[TBAA_STRUCT6]]
+// CHECK-NEXT:    store <8 x i16> [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT]], ptr @gs_pragma_packed_struct_4, align 4, !tbaa [[TBAA6]]
 // CHECK-NEXT:    ret void
 __attribute__((noinline)) void named_arg_pragma_packed_struct_4(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
@@ -436,3 +436,10 @@ void test_pragma_packed_struct_4() {
     named_arg_pragma_packed_struct_4(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_4);
     variadic_pragma_packed_struct_4(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_4);
 }
+//.
+// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"double", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[TBAA6]] = !{[[META4]], [[META4]], i64 0}
+//.



More information about the cfe-commits mailing list