[llvm] [IR][TBAA] Allow multiple fileds with same offset in TBAA struct-path (PR #76356)

Bushev Dmitry via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 12 09:07:26 PST 2024


https://github.com/dybv-sc updated https://github.com/llvm/llvm-project/pull/76356

>From 0376385a11fed3319427ee82100dd492cf584cf7 Mon Sep 17 00:00:00 2001
From: Dmitry Bushev <dmitry.bushev at syntacore.com>
Date: Mon, 25 Dec 2023 13:16:43 +0300
Subject: [PATCH] [IR][TBAA] Allow multiple fileds with same offset in TBAA
 struct-path

Support for multiple fields to have same offset in TBAA struct-path
metadata nodes. Primary goal is to support union-like structures
to participate in TBAA struct-path resolution.
---
 llvm/docs/LangRef.rst                         |  17 +-
 llvm/include/llvm/IR/Verifier.h               |  11 +-
 llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp  | 103 ++++++++--
 llvm/lib/IR/Verifier.cpp                      | 143 ++++++++-----
 .../TypeBasedAliasAnalysis/aggregates.ll      |  20 ++
 .../TypeBasedAliasAnalysis/union-path-new.ll  | 190 ++++++++++++++++++
 .../TypeBasedAliasAnalysis/union-path-old.ll  | 190 ++++++++++++++++++
 llvm/test/Verifier/tbaa.ll                    |  10 +-
 8 files changed, 602 insertions(+), 82 deletions(-)
 create mode 100644 llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-new.ll
 create mode 100644 llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-old.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index fd2e3aacd0169c..b0d11d0450ebaf 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -6434,9 +6434,10 @@ tuples this way:
    undefined if ``Offset`` is non-zero.
 
  * If ``BaseTy`` is a struct type then ``ImmediateParent(BaseTy, Offset)``
-   is ``(NewTy, NewOffset)`` where ``NewTy`` is the type contained in
-   ``BaseTy`` at offset ``Offset`` and ``NewOffset`` is ``Offset`` adjusted
-   to be relative within that inner type.
+   is array of ``(NewTy[N], NewOffset)`` where ``NewTy[N]`` is the Nth type
+   contained in ``BaseTy`` at offset ``Offset`` and ``NewOffset`` is
+   ``Offset`` adjusted to be relative within that inner type. Multiple types
+   occupying same offset allow to describe union-like structures.
 
 A memory access with an access tag ``(BaseTy1, AccessTy1, Offset1)``
 aliases a memory access with an access tag ``(BaseTy2, AccessTy2,
@@ -6447,9 +6448,9 @@ As a concrete example, the type descriptor graph for the following program
 
 .. code-block:: c
 
-    struct Inner {
+    union Inner {
       int i;    // offset 0
-      float f;  // offset 4
+      float f;  // offset 0
     };
 
     struct Outer {
@@ -6461,7 +6462,7 @@ As a concrete example, the type descriptor graph for the following program
     void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
       outer->f = 0;            // tag0: (OuterStructTy, FloatScalarTy, 0)
       outer->inner_a.i = 0;    // tag1: (OuterStructTy, IntScalarTy, 12)
-      outer->inner_a.f = 0.0;  // tag2: (OuterStructTy, FloatScalarTy, 16)
+      outer->inner_a.f = 0.0;  // tag2: (OuterStructTy, FloatScalarTy, 12)
       *f = 0.0;                // tag3: (FloatScalarTy, FloatScalarTy, 0)
     }
 
@@ -6475,13 +6476,13 @@ type):
     FloatScalarTy = ("float", CharScalarTy, 0)
     DoubleScalarTy = ("double", CharScalarTy, 0)
     IntScalarTy = ("int", CharScalarTy, 0)
-    InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 4)}
+    InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 0)}
     OuterStructTy = {"Outer", (FloatScalarTy, 0), (DoubleScalarTy, 4),
                      (InnerStructTy, 12)}
 
 
 with (e.g.) ``ImmediateParent(OuterStructTy, 12)`` = ``(InnerStructTy,
-0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0)``, and
+0)``, ``ImmediateParent(InnerStructTy, 0)`` = ``(IntScalarTy, 0), (FloatScalarTy, 0)``, and
 ``ImmediateParent(IntScalarTy, 0)`` = ``(CharScalarTy, 0)``.
 
 .. _tbaa_node_representation:
diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h
index b25f8eb77ee38b..95db2c4b16eca7 100644
--- a/llvm/include/llvm/IR/Verifier.h
+++ b/llvm/include/llvm/IR/Verifier.h
@@ -59,8 +59,15 @@ class TBAAVerifier {
 
   /// \name Helper functions used by \c visitTBAAMetadata.
   /// @{
-  MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode,
-                                       APInt &Offset, bool IsNewFormat);
+  std::vector<MDNode *> getFieldNodeFromTBAABaseNode(Instruction &I,
+                                                     const MDNode *BaseNode,
+                                                     APInt &Offset,
+                                                     bool IsNewFormat);
+  bool findAccessTypeNode(Instruction &I,
+                          SmallPtrSetImpl<const MDNode *> &StructPath,
+                          APInt Offset, bool IsNewFormat,
+                          const MDNode *AccessType, const MDNode *BaseNode,
+                          const MDNode *MD);
   TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I,
                                                        const MDNode *BaseNode,
                                                        bool IsNewFormat);
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index d05f42552e81de..12a529cd433a5d 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -121,6 +121,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdint>
+#include <stack>
 
 using namespace llvm;
 
@@ -299,9 +300,10 @@ class TBAAStructTypeNode {
     return TBAAStructTypeNode(TypeNode);
   }
 
-  /// Get this TBAAStructTypeNode's field in the type DAG with
+  /// Get this TBAAStructTypeNode's fields in the type DAG with
   /// given offset. Update the offset to be relative to the field type.
-  TBAAStructTypeNode getField(uint64_t &Offset) const {
+  /// There could be multiple fields with same offset.
+  std::vector<TBAAStructTypeNode> getField(uint64_t &Offset) const {
     bool NewFormat = isNewFormat();
     const ArrayRef<MDOperand> Operands = Node->operands();
     const unsigned NumOperands = Operands.size();
@@ -309,11 +311,11 @@ class TBAAStructTypeNode {
     if (NewFormat) {
       // New-format root and scalar type nodes have no fields.
       if (NumOperands < 6)
-        return TBAAStructTypeNode();
+        return {TBAAStructTypeNode()};
     } else {
       // Parent can be omitted for the root node.
       if (NumOperands < 2)
-        return TBAAStructTypeNode();
+        return {TBAAStructTypeNode()};
 
       // Fast path for a scalar type node and a struct type node with a single
       // field.
@@ -325,8 +327,8 @@ class TBAAStructTypeNode {
         Offset -= Cur;
         MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
         if (!P)
-          return TBAAStructTypeNode();
-        return TBAAStructTypeNode(P);
+          return {TBAAStructTypeNode()};
+        return {TBAAStructTypeNode(P)};
       }
     }
 
@@ -336,6 +338,8 @@ class TBAAStructTypeNode {
     unsigned NumOpsPerField = NewFormat ? 3 : 2;
     unsigned TheIdx = 0;
 
+    std::vector<TBAAStructTypeNode> Ret;
+
     for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
          Idx += NumOpsPerField) {
       uint64_t Cur =
@@ -353,10 +357,20 @@ class TBAAStructTypeNode {
     uint64_t Cur =
         mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
     Offset -= Cur;
+
+    // Collect all fields that have right offset.
     MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
-    if (!P)
-      return TBAAStructTypeNode();
-    return TBAAStructTypeNode(P);
+    Ret.emplace_back(P ? TBAAStructTypeNode(P) : TBAAStructTypeNode());
+
+    while (TheIdx > FirstFieldOpNo) {
+      TheIdx -= NumOpsPerField;
+      auto Val = mdconst::extract<ConstantInt>(Operands[TheIdx + 1]);
+      if (Cur != Val->getZExtValue())
+        break;
+      MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
+      P ? Ret.emplace_back(P) : Ret.emplace_back();
+    }
+    return Ret;
   }
 };
 
@@ -572,6 +586,39 @@ static bool hasField(TBAAStructTypeNode BaseType,
   return false;
 }
 
+static bool rangeOverlap(std::pair<size_t, size_t> Range1,
+                         std::pair<size_t, size_t> Range2) {
+  return Range1.first < Range2.first + Range2.second &&
+         Range1.first + Range1.second > Range2.first;
+}
+
+/// Return true if two accessess to given \p BaseType at \p Offset1 and
+/// at \p Offset2 may alias. This check does not account for NewStructType
+/// parameters such as size and may be more conservative.
+static bool mayFieldAccessesAlias(TBAAStructTypeNode BaseType, uint64_t Offset1,
+                                  uint64_t Offset2) {
+  if (!BaseType.getNode())
+    return true;
+
+  auto PrevDiff = (long long)(Offset1) - (long long)(Offset2);
+  auto Fields1 = BaseType.getField(Offset1);
+  auto Fields2 = BaseType.getField(Offset2);
+  auto CurrentDiff = (long long)(Offset1) - (long long)(Offset2);
+
+  // If distance between offsets is not same that mean accesses are
+  // to different fields.
+  if (PrevDiff != CurrentDiff)
+    return false;
+
+  // Fields that share same offset may have various internal structure. For
+  // some of them - same field may be accessed while for others - different
+  // ones. To be conservative we report MayAlias if any of fields report
+  // MayAlias.
+  return llvm::any_of(Fields1, [&](auto &FieldType) {
+    return mayFieldAccessesAlias(FieldType, Offset1, Offset2);
+  });
+}
+
 /// Return true if for two given accesses, one of the accessed objects may be a
 /// subobject of the other. The \p BaseTag and \p SubobjectTag parameters
 /// describe the accesses to the base object and the subobject respectively.
@@ -599,20 +646,38 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
   // from the base type, follow the edge with the correct offset in the type DAG
   // and adjust the offset until we reach the field type or until we reach the
   // access type.
+  // If multiple fields have same offset in some base type, then scan each such
+  // field.
   bool NewFormat = BaseTag.isNewFormat();
   TBAAStructTypeNode BaseType(BaseTag.getBaseType());
   uint64_t OffsetInBase = BaseTag.getOffset();
 
-  for (;;) {
-    // In the old format there is no distinction between fields and parent
-    // types, so in this case we consider all nodes up to the root.
-    if (!BaseType.getNode()) {
-      assert(!NewFormat && "Did not see access type in access path!");
-      break;
-    }
+  SmallVector<std::pair<TBAAStructTypeNode, uint64_t>, 4> ToCheck;
+  ToCheck.emplace_back(BaseType, OffsetInBase);
+  while (!ToCheck.empty()) {
+    std::tie(BaseType, OffsetInBase) = ToCheck.back();
+    ToCheck.pop_back();
+
+    // In case if root is reached, still check the remaining candidates.
+    // For new format it is always expected for access type to be found.
+    // For old format all nodes up to the root are considered from all
+    // candidates.
+    if (!BaseType.getNode())
+      continue;
 
     if (BaseType.getNode() == SubobjectTag.getBaseType()) {
-      bool SameMemberAccess = OffsetInBase == SubobjectTag.getOffset();
+      bool SameMemberAccess;
+      uint64_t SubobjectOffset = SubobjectTag.getOffset();
+      if (NewFormat)
+        // If size information is available, check if their access locations
+        // overlap.
+        SameMemberAccess = rangeOverlap(
+            std::make_pair(OffsetInBase, BaseTag.getSize()),
+            std::make_pair(SubobjectOffset, SubobjectTag.getSize()));
+      else
+        // Else do a more conservative check.
+        SameMemberAccess =
+            mayFieldAccessesAlias(BaseType, OffsetInBase, SubobjectOffset);
       if (GenericTag) {
         *GenericTag = SameMemberAccess ? SubobjectTag.getNode() :
                                          createAccessTag(CommonType);
@@ -627,13 +692,15 @@ static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
 
     // Follow the edge with the correct offset. Offset will be adjusted to
     // be relative to the field type.
-    BaseType = BaseType.getField(OffsetInBase);
+    for (auto &&F : BaseType.getField(OffsetInBase))
+      ToCheck.emplace_back(F, OffsetInBase);
   }
 
   // If the base object has a direct or indirect field of the subobject's type,
   // then this may be an access to that field. We need this to check now that
   // we support aggregates as access types.
   if (NewFormat) {
+    assert(BaseType.getNode() && "Did not see access type in access path!");
     // TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType());
     TBAAStructTypeNode FieldType(SubobjectTag.getBaseType());
     if (hasField(BaseType, FieldType)) {
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index b04d39c700a8f5..e0d543320059b7 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6969,48 +6969,56 @@ bool TBAAVerifier::isValidScalarTBAANode(const MDNode *MD) {
   return Result;
 }
 
-/// Returns the field node at the offset \p Offset in \p BaseNode.  Update \p
-/// Offset in place to be the offset within the field node returned.
+/// Returns one or several field nodes at the offset \p Offset in \p BaseNode.
+/// Returns empty vector if \p BaseNode has no fields with specified offset.
+/// Update \p Offset in place to be the offset within the field node returned.
 ///
 /// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode.
-MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I,
-                                                   const MDNode *BaseNode,
-                                                   APInt &Offset,
-                                                   bool IsNewFormat) {
+std::vector<MDNode *> TBAAVerifier::getFieldNodeFromTBAABaseNode(
+    Instruction &I, const MDNode *BaseNode, APInt &Offset, bool IsNewFormat) {
   assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!");
 
   // Scalar nodes have only one possible "field" -- their parent in the access
   // hierarchy.  Offset must be zero at this point, but our caller is supposed
   // to check that.
   if (BaseNode->getNumOperands() == 2)
-    return cast<MDNode>(BaseNode->getOperand(1));
+    return {cast<MDNode>(BaseNode->getOperand(1))};
 
   unsigned FirstFieldOpNo = IsNewFormat ? 3 : 1;
   unsigned NumOpsPerField = IsNewFormat ? 3 : 2;
+
+  unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
   for (unsigned Idx = FirstFieldOpNo; Idx < BaseNode->getNumOperands();
            Idx += NumOpsPerField) {
     auto *OffsetEntryCI =
         mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1));
     if (OffsetEntryCI->getValue().ugt(Offset)) {
       if (Idx == FirstFieldOpNo) {
-        CheckFailed("Could not find TBAA parent in struct type node", &I,
-                    BaseNode, &Offset);
-        return nullptr;
+        return {};
       }
 
-      unsigned PrevIdx = Idx - NumOpsPerField;
-      auto *PrevOffsetEntryCI =
-          mdconst::extract<ConstantInt>(BaseNode->getOperand(PrevIdx + 1));
-      Offset -= PrevOffsetEntryCI->getValue();
-      return cast<MDNode>(BaseNode->getOperand(PrevIdx));
+      LastIdx = Idx - NumOpsPerField;
+      break;
     }
   }
 
-  unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField;
   auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>(
       BaseNode->getOperand(LastIdx + 1));
-  Offset -= LastOffsetEntryCI->getValue();
-  return cast<MDNode>(BaseNode->getOperand(LastIdx));
+  auto LastOffsetVal = LastOffsetEntryCI->getValue();
+  Offset -= LastOffsetVal;
+
+  std::vector<MDNode *> Ret;
+  Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
+  while (LastIdx > FirstFieldOpNo) {
+    LastIdx -= NumOpsPerField;
+    LastOffsetEntryCI =
+        mdconst::extract<ConstantInt>(BaseNode->getOperand(LastIdx + 1));
+    if (LastOffsetEntryCI->getValue() != LastOffsetVal)
+      break;
+    Ret.emplace_back(cast<MDNode>(BaseNode->getOperand(LastIdx)));
+  }
+
+  return Ret;
 }
 
 static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
@@ -7087,47 +7095,84 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
   CheckTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
 
   APInt Offset = OffsetCI->getValue();
-  bool SeenAccessTypeInPath = false;
 
-  SmallPtrSet<MDNode *, 4> StructPath;
+  SmallPtrSet<const MDNode *, 4> StructPath;
 
-  for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
-       BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset,
-                                               IsNewFormat)) {
-    if (!StructPath.insert(BaseNode).second) {
-      CheckFailed("Cycle detected in struct path", &I, MD);
-      return false;
-    }
+  auto &&[Invalid, BaseNodeBitWidth] =
+      verifyTBAABaseNode(I, BaseNode, IsNewFormat);
 
-    bool Invalid;
-    unsigned BaseNodeBitWidth;
-    std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode,
-                                                             IsNewFormat);
+  // If the base node is invalid in itself, then we've already printed all the
+  // errors we wanted to print.
+  if (Invalid)
+    return false;
 
-    // If the base node is invalid in itself, then we've already printed all the
-    // errors we wanted to print.
-    if (Invalid)
-      return false;
+  bool SeenAccessTypeInPath = BaseNode == AccessType;
+  if (SeenAccessTypeInPath) {
+    CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access", &I,
+              MD, &Offset);
+    if (IsNewFormat)
+      return true;
+  }
 
-    SeenAccessTypeInPath |= BaseNode == AccessType;
+  CheckTBAA(findAccessTypeNode(I, StructPath, Offset, IsNewFormat, AccessType,
+                               BaseNode, MD) ||
+                SeenAccessTypeInPath,
+            "Did not see access type in access path!", &I, MD);
+  return true;
+}
 
-    if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
-      CheckTBAA(Offset == 0, "Offset not zero at the point of scalar access",
-                &I, MD, &Offset);
+bool TBAAVerifier::findAccessTypeNode(
+    Instruction &I, SmallPtrSetImpl<const MDNode *> &StructPath, APInt Offset,
+    bool IsNewFormat, const MDNode *AccessType, const MDNode *BaseNode,
+    const MDNode *MD) {
+  if (!BaseNode || IsRootTBAANode(BaseNode))
+    return false;
 
-    CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
-                  (BaseNodeBitWidth == 0 && Offset == 0) ||
-                  (IsNewFormat && BaseNodeBitWidth == ~0u),
-              "Access bit-width not the same as description bit-width", &I, MD,
-              BaseNodeBitWidth, Offset.getBitWidth());
+  auto &&[Invalid, BaseNodeBitWidth] =
+      verifyTBAABaseNode(I, BaseNode, IsNewFormat);
 
-    if (IsNewFormat && SeenAccessTypeInPath)
-      break;
+  // If the base node is invalid in itself, then we've already printed all the
+  // errors we wanted to print.
+  if (Invalid)
+    return false;
+
+  // Offset at point of scalar access must be zero. Skip mismatched nodes.
+  if ((isValidScalarTBAANode(BaseNode) || BaseNode == AccessType) &&
+      Offset != 0)
+    return false;
+
+  CheckTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
+                (BaseNodeBitWidth == 0 && Offset == 0) ||
+                (IsNewFormat && BaseNodeBitWidth == ~0u),
+            "Access bit-width not the same as description bit-width", &I, MD,
+            BaseNodeBitWidth, Offset.getBitWidth());
+
+  bool SeenAccessTypeInPath = (BaseNode == AccessType && Offset == 0);
+
+  if (IsNewFormat && SeenAccessTypeInPath)
+    return true;
+
+  auto ProbableNodes =
+      getFieldNodeFromTBAABaseNode(I, BaseNode, Offset, IsNewFormat);
+
+  if (!StructPath.insert(BaseNode).second) {
+    CheckFailed("Cycle detected in struct path", &I, MD);
+    return false;
   }
 
-  CheckTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", &I,
-            MD);
-  return true;
+  for (auto *PN : ProbableNodes) {
+    if (!PN || IsRootTBAANode(PN))
+      continue;
+
+    SmallPtrSet<const MDNode *, 4> StructPathCopy;
+    StructPathCopy.insert(StructPath.begin(), StructPath.end());
+
+    if (findAccessTypeNode(I, StructPathCopy, Offset, IsNewFormat, AccessType,
+                           PN, MD))
+      return true;
+  }
+
+  return SeenAccessTypeInPath;
 }
 
 char VerifierLegacyPass::ID = 0;
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll
index 4049c78049e036..422f8d80404687 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/aggregates.ll
@@ -105,6 +105,22 @@ entry:
   ret i32 %0
 }
 
+; C vs. D  =>  MayAlias.
+define i32 @f7(ptr %c, ptr %d) {
+entry:
+; CHECK-LABEL: f7
+; CHECK: MayAlias: store i16 7, {{.*}} <-> store i32 5,
+; OPT-LABEL: f7
+; OPT: store i32 5,
+; OPT: store i16 7,
+; OPT: load i32
+; OPT: ret i32
+  store i32 5, ptr %c, align 4, !tbaa !18  ; TAG_Union_int
+  store i16 7, ptr %d, align 4, !tbaa !17  ; TAG_Union_short
+  %0 = load i32, ptr %c, align 4, !tbaa !18  ; TAG_Union_int
+  ret i32 %0
+}
+
 !0 = !{!"root"}
 !1 = !{!0, i64 1, !"char"}
 !2 = !{!1, i64 4, !"int"}
@@ -128,3 +144,7 @@ entry:
 
 !14 = !{!4, i64 2, !"D", !11, i64 0, i64 2}
 !15 = !{!14, !14, i64 0, i64 2}  ; TAG_D
+
+!16 = !{!1, i64 2, !"Union", !11, i64 0, i64 2, !2, i64 0, i64 4}
+!17 = !{!16, !11, i64 0, i64 2}  ; TAG_Union_short
+!18 = !{!16, !2, i64 0, i64 4}  ; TAG_Union_int
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-new.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-new.ll
new file mode 100644
index 00000000000000..76ee80b259b951
--- /dev/null
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-new.ll
@@ -0,0 +1,190 @@
+; RUN: opt < %s -aa-pipeline=tbaa -passes=aa-eval -evaluate-aa-metadata \
+; RUN:     -print-no-aliases -print-may-aliases -disable-output 2>&1 | \
+; RUN:     FileCheck %s
+; RUN: opt < %s -aa-pipeline=tbaa -passes=gvn -S | FileCheck %s --check-prefix=OPT
+;
+; Check various union use cases with old struct path TBAA.
+
+; IR generated from following C code:
+;
+; // Case 1:  Union with array field.
+;
+; union A{
+;  int a[5];
+;  double g;
+; };
+;
+; // MayAlias.
+; double f1(union A* a) {
+;   a->g = 2.0;
+;   a->a[1] = 5;
+;   return a->g;
+; }
+;
+; // Case 2: Union with struct and primitive type.
+;
+; struct S1{
+;   int a;
+;   float b;
+;   int c;
+; };
+; 
+; union S2{
+;   struct S1 c;
+;   double d;
+; };
+; 
+; // MayAlias.
+; double f2(union S2* u) {
+;   u->d = 2.0;
+;   u->c.b = 3.0;
+;   return u->d;
+; }
+; // NoAlias.
+; // Contrary to old struct path, here tbaa
+; // no alias. (see union-path-old.ll)
+; double f3(union S2* u) {
+;   u->d = 2.0;
+;   u->c.c = 3;
+;   return u->d;
+; }
+; 
+; // Case 3: Union of two structs.
+;
+; struct FloatS{
+;   float a;
+;   float b;
+; };
+; 
+; struct IntS{
+;  short a;
+;  short b;
+;  char c;
+; };
+; 
+; union SU {
+;   struct FloatS a;
+;   struct IntS b;
+; };
+; 
+; // NoAlias. 
+; float f4(union SU* u) {
+;   u->a.a = 3.0;
+;   u->b.c = 5;
+;   return u->a.a;
+; }
+;
+; // MayAlias.
+; float f5(union SU* u) {
+;   u->a.a = 3.0;
+;   u->b.b = 5;
+;   return u->a.a;
+; }
+
+
+
+define double @f1(ptr %0) {
+entry:
+; CHECK-LABEL: f1
+; CHECK: MayAlias:   store i32 5, {{.*}} <-> store double 2.0
+; OPT-LABEL: f1
+; OPT: store double 2.0
+; OPT: store i32 5,
+; OPT: load double
+; OPT: ret double
+  store double 2.000000e+00, ptr %0, align 8, !tbaa !5
+  %2 = getelementptr inbounds i8, ptr %0, i64 4
+  store i32 5, ptr %2, align 4, !tbaa !11
+  %3 = load double, ptr %0, align 8, !tbaa !5
+  ret double %3
+}
+
+define double @f2(ptr %0) {
+entry:
+; CHECK-LABEL: f2
+; CHECK: MayAlias:   store float 3.0{{.*}} <-> store double 2.0
+; OPT-LABEL: f2
+; OPT: store double 2.0
+; OPT: store float 3.0
+; OPT: load double
+; OPT: ret double
+  store double 2.000000e+00, ptr %0, align 8, !tbaa !12
+  %2 = getelementptr inbounds i8, ptr %0, i64 4
+  store float 3.000000e+00, ptr %2, align 4, !tbaa !16
+  %3 = load double, ptr %0, align 8, !tbaa !12
+  ret double %3
+}
+
+define double @f3(ptr %0) {
+entry:
+; CHECK-LABEL: f3
+; CHECK: NoAlias:   store i32 3,{{.*}} <-> store double 2.0
+; OPT-LABEL: f3
+; OPT: store double 2.0
+; OPT: store i32 3
+; OPT-NOT: load double
+; OPT: ret double 2.0
+  store double 2.000000e+00, ptr %0, align 8, !tbaa !12
+  %2 = getelementptr inbounds i8, ptr %0, i64 8
+  store i32 3, ptr %2, align 8, !tbaa !17
+  %3 = load double, ptr %0, align 8, !tbaa !12
+  ret double %3
+}
+
+define float @f4(ptr %0) {
+; CHECK-LABEL: f4
+; CHECK: NoAlias:   store i8 5{{.*}} <-> store float 3.0
+; OPT-LABEL: f4
+; OPT: store float 3.0
+; OPT: store i8 5
+; OPT-NOT: load
+; OPT: ret float 3.0
+  store float 3.000000e+00, ptr %0, align 4, !tbaa !18
+  %2 = getelementptr inbounds i8, ptr %0, i64 4
+  store i8 5, ptr %2, align 4, !tbaa !23
+  %3 = load float, ptr %0, align 4, !tbaa !18
+  ret float %3
+}
+
+define float @f5(ptr %0) {
+entry:
+; CHECK-LABEL: f5
+; CHECK: MayAlias: store i16 5, {{.*}} <-> store float 3.0
+; OPT-LABEL: f5
+; OPT: store float 3.0
+; OPT: store i16 5
+; OPT: load float
+; OPT: ret float
+  store float 3.000000e+00, ptr %0, align 4, !tbaa !18
+  %2 = getelementptr inbounds i8, ptr %0, i64 2
+  store i16 5, ptr %2, align 2, !tbaa !24
+  %3 = load float, ptr %0, align 4, !tbaa !18
+  ret float %3
+}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{!""}
+!5 = !{!6, !10, i64 0, i64 8}
+!6 = !{!8, i64 24, !"A", !7, i64 0, i64 20, !10, i64 0, i64 8}
+!7 = !{!8, i64 4, !"int"}
+!8 = !{!9, i64 1, !"omnipotent char"}
+!9 = !{!"Simple C/C++ TBAA"}
+!10 = !{!8, i64 8, !"double"}
+!11 = !{!6, !7, i64 0, i64 4}
+!12 = !{!13, !10, i64 0, i64 8}
+!13 = !{!8, i64 8, !"S2", !14, i64 0, i64 8, !10, i64 0, i64 8}
+!14 = !{!8, i64 8, !"S1", !7, i64 0, i64 4, !15, i64 4, i64 4, !7, i64 8, i64 4}
+!15 = !{!8, i64 4, !"float"}
+!16 = !{!13, !15, i64 4, i64 4}
+!17 = !{!13, !7, i64 8, i64 4}
+!18 = !{!19, !15, i64 0, i64 4}
+!19 = !{!8, i64 8, !"SU", !20, i64 0, i64 8, !21, i64 0, i64 6}
+!20 = !{!8, i64 8, !"FloatS", !15, i64 0, i64 4, !15, i64 4, i64 4}
+!21 = !{!8, i64 6, !"IntS", !22, i64 0, i64 2, !22, i64 2, i64 2, !8, i64 4, i64 1}
+!22 = !{!8, i64 2, !"short"}
+!23 = !{!19, !8, i64 4, i64 1}
+!24 = !{!19, !22, i64 2, i64 2}
+
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-old.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-old.ll
new file mode 100644
index 00000000000000..b62e6e2243f7b6
--- /dev/null
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/union-path-old.ll
@@ -0,0 +1,190 @@
+; RUN: opt < %s -aa-pipeline=tbaa -passes=aa-eval -evaluate-aa-metadata \
+; RUN:     -print-no-aliases -print-may-aliases -disable-output 2>&1 | \
+; RUN:     FileCheck %s
+; RUN: opt < %s -aa-pipeline=tbaa -passes=gvn -S | FileCheck %s --check-prefix=OPT
+;
+; Check various union use cases with old struct path TBAA.
+
+; IR generated from following C code:
+;
+; // Case 1:  Union with array field.
+;
+; union A{
+;  int a[5];
+;  double g;
+; };
+;
+; // MayAlias.
+; double f1(union A* a) {
+;   a->g = 2.0;
+;   a->a[1] = 5;
+;   return a->g;
+; }
+;
+; // Case 2: Union with struct and primitive type.
+;
+; struct S1{
+;   int a;
+;   float b;
+;   int c;
+; };
+; 
+; union S2{
+;   struct S1 c;
+;   double d;
+; };
+; 
+; // MayAlias.
+; double f2(union S2* u) {
+;   u->d = 2.0;
+;   u->c.b = 3.0;
+;   return u->d;
+; }
+; // MayAlias.
+; // Old struct path is conservative here.
+; // (For reference see union-path-new.ll)
+; double f3(union S2* u) {
+;   u->d = 2.0;
+;   u->c.c = 3;
+;   return u->d;
+; }
+; 
+; // Case 3: Union of two structs.
+;
+; struct FloatS{
+;   float a;
+;   float b;
+; };
+; 
+; struct IntS{
+;  short a;
+;  short b;
+;  char c;
+; };
+; 
+; union SU {
+;   struct FloatS a;
+;   struct IntS b;
+; };
+; 
+; // NoAlias. 
+; float f4(union SU* u) {
+;   u->a.a = 3.0;
+;   u->b.c = 5;
+;   return u->a.a;
+; }
+;
+; // MayAlias.
+; float f5(union SU* u) {
+;   u->a.a = 3.0;
+;   u->b.b = 5;
+;   return u->a.a;
+; }
+
+
+
+define double @f1(ptr %0) {
+entry:
+; CHECK-LABEL: f1
+; CHECK: MayAlias:   store i32 5, {{.*}} <-> store double 2.0
+; OPT-LABEL: f1
+; OPT: store double 2.0
+; OPT: store i32 5,
+; OPT: load double
+; OPT: ret double
+  store double 2.000000e+00, ptr %0, align 8, !tbaa !5
+  %2 = getelementptr inbounds i8, ptr %0, i64 4
+  store i32 5, ptr %2, align 4, !tbaa !11
+  %3 = load double, ptr %0, align 8, !tbaa !5
+  ret double %3
+}
+
+define double @f2(ptr %0) {
+entry:
+; CHECK-LABEL: f2
+; CHECK: MayAlias:   store float 3.0{{.*}} <-> store double 2.0
+; OPT-LABEL: f2
+; OPT: store double 2.0
+; OPT: store float 3.0
+; OPT: load double
+; OPT: ret double
+  store double 2.000000e+00, ptr %0, align 8, !tbaa !12
+  %2 = getelementptr inbounds i8, ptr %0, i64 4
+  store float 3.000000e+00, ptr %2, align 4, !tbaa !16
+  %3 = load double, ptr %0, align 8, !tbaa !12
+  ret double %3
+}
+
+define double @f3(ptr %0) {
+entry:
+; CHECK-LABEL: f3
+; CHECK: MayAlias:   store i32 3,{{.*}} <-> store double 2.0
+; OPT-LABEL: f3
+; OPT: store double 2.0
+; OPT: store i32 3
+; OPT: load double
+; OPT: ret double
+  store double 2.000000e+00, ptr %0, align 8, !tbaa !12
+  %2 = getelementptr inbounds i8, ptr %0, i64 8
+  store i32 3, ptr %2, align 8, !tbaa !17
+  %3 = load double, ptr %0, align 8, !tbaa !12
+  ret double %3
+}
+
+define float @f4(ptr %0) {
+; CHECK-LABEL: f4
+; CHECK: NoAlias:   store i8 5{{.*}} <-> store float 3.0
+; OPT-LABEL: f4
+; OPT: store float 3.0
+; OPT: store i8 5
+; OPT-NOT: load
+; OPT: ret float 3.0
+  store float 3.000000e+00, ptr %0, align 4, !tbaa !18
+  %2 = getelementptr inbounds i8, ptr %0, i64 4
+  store i8 5, ptr %2, align 4, !tbaa !23
+  %3 = load float, ptr %0, align 4, !tbaa !18
+  ret float %3
+}
+
+define float @f5(ptr %0) {
+entry:
+; CHECK-LABEL: f5
+; CHECK: MayAlias: store i16 5, {{.*}} <-> store float 3.0
+; OPT-LABEL: f5
+; OPT: store float 3.0
+; OPT: store i16 5
+; OPT: load float
+; OPT: ret float
+  store float 3.000000e+00, ptr %0, align 4, !tbaa !18
+  %2 = getelementptr inbounds i8, ptr %0, i64 2
+  store i16 5, ptr %2, align 2, !tbaa !24
+  %3 = load float, ptr %0, align 4, !tbaa !18
+  ret float %3
+}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{!""}
+!5 = !{!6, !10, i64 0}
+!6 = !{!"A", !7, i64 0, !10, i64 0}
+!7 = !{!"int", !8, i64 0}
+!8 = !{!"omnipotent char", !9, i64 0}
+!9 = !{!"Simple C/C++ TBAA"}
+!10 = !{!"double", !8, i64 0}
+!11 = !{!6, !7, i64 0}
+!12 = !{!13, !10, i64 0}
+!13 = !{!"S2", !14, i64 0, !10, i64 0}
+!14 = !{!"S1", !7, i64 0, !15, i64 4, !7, i64 8}
+!15 = !{!"float", !8, i64 0}
+!16 = !{!13, !15, i64 4}
+!17 = !{!13, !7, i64 8}
+!18 = !{!19, !15, i64 0}
+!19 = !{!"SU", !20, i64 0, !21, i64 0}
+!20 = !{!"FloatS", !15, i64 0, !15, i64 4}
+!21 = !{!"IntS", !22, i64 0, !22, i64 2, !8, i64 4}
+!22 = !{!"short", !8, i64 0}
+!23 = !{!19, !8, i64 4}
+!24 = !{!19, !22, i64 2}
+
diff --git a/llvm/test/Verifier/tbaa.ll b/llvm/test/Verifier/tbaa.ll
index abaa415aed749b..107192542d55d9 100644
--- a/llvm/test/Verifier/tbaa.ll
+++ b/llvm/test/Verifier/tbaa.ll
@@ -61,15 +61,15 @@ define void @f_1(ptr %ptr) {
 ; CHECK: Cycle detected in struct path
 ; CHECK-NEXT:  store i32 0, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
 
-; CHECK: Offset not zero at the point of scalar access
+; CHECK: Did not see access type in access path
+; CHECK-NEXT:  store i32 0, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
+
+; CHECK: Did not see access type in access path
 ; CHECK-NEXT:  store i32 1, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
 
-; CHECK: Offset not zero at the point of scalar access
+; CHECK: Did not see access type in access path
 ; CHECK-NEXT:  store i32 2, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
 
-; CHECK: Could not find TBAA parent in struct type node
-; CHECK-NEXT:  store i32 3, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
-
 ; CHECK: Did not see access type in access path!
 ; CHECK-NEXT:  store i32 3, ptr %ptr, align 4, !tbaa !{{[0-9]+}}
 



More information about the llvm-commits mailing list