[llvm-branch-commits] [llvm] 1860331 - [MemCpyOpt] Correctly merge alias scopes during call slot optimization

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Dec 3 09:29:10 PST 2020


Author: modimo
Date: 2020-12-03T09:23:37-08:00
New Revision: 18603319321a6c1b158800bcc60035ee01549516

URL: https://github.com/llvm/llvm-project/commit/18603319321a6c1b158800bcc60035ee01549516
DIFF: https://github.com/llvm/llvm-project/commit/18603319321a6c1b158800bcc60035ee01549516.diff

LOG: [MemCpyOpt] Correctly merge alias scopes during call slot optimization

When MemCpyOpt performs call slot optimization it will concatenate the `alias.scope` metadata between the function call and the memcpy. However, scoped AA relies on the domains in metadata to be maintained in a caller-callee relationship. Naive concatenation breaks this assumption leading to bad AA results.

The fix is to take the intersection of domains then union the scopes within those domains.

The original bug came from a case of rust bad codegen which uses this bad aliasing to perform additional memcpy optimizations. As show in the added test case `%src` got forwarded past its lifetime leading to a dereference of garbage data.

Testing
ninja check-llvm

Reviewed By: jeroen.dobbelaere

Differential Revision: https://reviews.llvm.org/D91576

Added: 
    llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
    llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll

Modified: 
    llvm/include/llvm/Analysis/ScopedNoAliasAA.h
    llvm/lib/Analysis/ScopedNoAliasAA.cpp
    llvm/lib/IR/Metadata.cpp
    llvm/test/Transforms/GVN/noalias.ll
    llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
    llvm/test/Transforms/NewGVN/noalias.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
index c55228eace4b..562640647918 100644
--- a/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -25,6 +25,27 @@ class Function;
 class MDNode;
 class MemoryLocation;
 
+/// This is a simple wrapper around an MDNode which provides a higher-level
+/// interface by hiding the details of how alias analysis information is encoded
+/// in its operands.
+class AliasScopeNode {
+  const MDNode *Node = nullptr;
+
+public:
+  AliasScopeNode() = default;
+  explicit AliasScopeNode(const MDNode *N) : Node(N) {}
+
+  /// Get the MDNode for this AliasScopeNode.
+  const MDNode *getNode() const { return Node; }
+
+  /// Get the MDNode for this AliasScopeNode's domain.
+  const MDNode *getDomain() const {
+    if (Node->getNumOperands() < 2)
+      return nullptr;
+    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+  }
+};
+
 /// A simple AA result which uses scoped-noalias metadata to answer queries.
 class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> {
   friend AAResultBase<ScopedNoAliasAAResult>;

diff  --git a/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 5e2aaab050af..dca18033e8fd 100644
--- a/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -50,31 +50,6 @@ using namespace llvm;
 static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
                                          cl::init(true), cl::Hidden);
 
-namespace {
-
-/// This is a simple wrapper around an MDNode which provides a higher-level
-/// interface by hiding the details of how alias analysis information is encoded
-/// in its operands.
-class AliasScopeNode {
-  const MDNode *Node = nullptr;
-
-public:
-  AliasScopeNode() = default;
-  explicit AliasScopeNode(const MDNode *N) : Node(N) {}
-
-  /// Get the MDNode for this AliasScopeNode.
-  const MDNode *getNode() const { return Node; }
-
-  /// Get the MDNode for this AliasScopeNode's domain.
-  const MDNode *getDomain() const {
-    if (Node->getNumOperands() < 2)
-      return nullptr;
-    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
-  }
-};
-
-} // end anonymous namespace
-
 AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
                                          const MemoryLocation &LocB,
                                          AAQueryInfo &AAQI) {

diff  --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index a32d8f420eae..1787899d16a6 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -926,7 +927,32 @@ MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) {
   if (!A || !B)
     return nullptr;
 
-  return concatenate(A, B);
+  // Take the intersection of domains then union the scopes
+  // within those domains
+  SmallPtrSet<const MDNode *, 16> ADomains;
+  SmallPtrSet<const MDNode *, 16> IntersectDomains;
+  SmallSetVector<Metadata *, 4> MDs;
+  for (const MDOperand &MDOp : A->operands())
+    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        ADomains.insert(Domain);
+
+  for (const MDOperand &MDOp : B->operands())
+    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        if (ADomains.contains(Domain)) {
+          IntersectDomains.insert(Domain);
+          MDs.insert(MDOp);
+        }
+
+  for (const MDOperand &MDOp : A->operands())
+    if (const MDNode *NAMD = dyn_cast<MDNode>(MDOp))
+      if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+        if (IntersectDomains.contains(Domain))
+          MDs.insert(MDOp);
+
+  return MDs.empty() ? nullptr
+                     : getOrSelfReference(A->getContext(), MDs.getArrayRef());
 }
 
 MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {

diff  --git a/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
new file mode 100644
index 000000000000..4c8369d30adb
--- /dev/null
+++ b/llvm/test/Analysis/ScopedNoAliasAA/alias-scope-merging.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s
+
+; Alias scopes are merged by taking the intersection of domains, then the union of the scopes within those domains
+define i8 @test(i8 %input) {
+  %tmp = alloca i8
+  %dst = alloca i8
+  %src = alloca i8
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false), !alias.scope ![[SCOPE:[0-9]+]]
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !4
+  store i8 %input, i8* %src
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !alias.scope !0
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !4
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false), !alias.scope !4
+  %ret_value = load i8, i8* %dst
+  ret i8 %ret_value
+}
+
+; Merged scope contains "callee0: %a" and "callee0 : %b"
+; CHECK-DAG: ![[CALLEE0_A:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %a"}
+; CHECK-DAG: ![[CALLEE0_B:[0-9]+]] = distinct !{!{{[0-9]+}}, !{{[0-9]+}}, !"callee0: %b"}
+; CHECK-DAG: ![[SCOPE]] = !{![[CALLEE0_A]], ![[CALLEE0_B]]}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+
+!0 = !{!1, !7}
+!1 = distinct !{!1, !3, !"callee0: %a"}
+!2 = distinct !{!2, !3, !"callee0: %b"}
+!3 = distinct !{!3, !"callee0"}
+
+!4 = !{!2, !5}
+!5 = distinct !{!5, !6, !"callee1: %a"}
+!6 = distinct !{!6, !"callee1"}
+
+!7 = distinct !{!7, !8, !"callee2: %a"}
+!8 = distinct !{!8, !"callee2"}

diff  --git a/llvm/test/Transforms/GVN/noalias.ll b/llvm/test/Transforms/GVN/noalias.ll
index 9cfcc9bbd9d4..90279042e6b0 100644
--- a/llvm/test/Transforms/GVN/noalias.ll
+++ b/llvm/test/Transforms/GVN/noalias.ll
@@ -5,7 +5,7 @@ define i32 @test1(i32* %p, i32* %q) {
 ; CHECK: load i32, i32* %p
 ; CHECK-NOT: noalias
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !noalias !0
+  %a = load i32, i32* %p, !noalias !3
   %b = load i32, i32* %p
   %c = add i32 %a, %b
   ret i32 %c
@@ -13,31 +13,32 @@ define i32 @test1(i32* %p, i32* %q) {
 
 define i32 @test2(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test2(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !0
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !0
-  %b = load i32, i32* %p, !alias.scope !0
+  %a = load i32, i32* %p, !alias.scope !3
+  %b = load i32, i32* %p, !alias.scope !3
   %c = add i32 %a, %b
   ret i32 %c
 }
 
-; FIXME: In this case we can do better than intersecting the scopes, and can
-; concatenate them instead. Both loads are in the same basic block, the first
-; makes the second safe to speculatively execute, and there are no calls that may
-; throw in between.
 define i32 @test3(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test3(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !1
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !1
-  %b = load i32, i32* %p, !alias.scope !2
+  %a = load i32, i32* %p, !alias.scope !4
+  %b = load i32, i32* %p, !alias.scope !5
   %c = add i32 %a, %b
   ret i32 %c
 }
 
+; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
+; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
 declare i32 @foo(i32*) readonly
 
-!0 = !{!0}
-!1 = !{!1}
-!2 = !{!0, !1}
+!0 = distinct !{!0, !2, !"callee0: %a"}
+!1 = distinct !{!1, !2, !"callee0: %b"}
+!2 = distinct !{!2, !"callee0"}
 
+!3 = !{!0}
+!4 = !{!1}
+!5 = !{!0, !1}

diff  --git a/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll b/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
index e5a1aa7362a5..7fa26b46e25d 100644
--- a/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll
@@ -40,10 +40,10 @@ return:                                           ; preds = %if.end, %if.then
 ; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0}
 ; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0}
 ; CHECK: ![[RANGE]] = !{i32 10, i32 25}
-; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE2:[0-9]+]], ![[SCOPE1:[0-9]+]]}
+; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]}
 ; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"}
-; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
 ; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"}
+; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
 ; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]}
 ; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"}
 

diff  --git a/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll b/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
new file mode 100644
index 000000000000..346546f72c4c
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_badaa.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -S -memcpyopt | FileCheck --match-full-lines %s
+
+; Make sure callslot optimization merges alias.scope metadata correctly when it merges instructions.
+; Merging here naively generates:
+;  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false), !alias.scope !3
+;  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !0
+;   ...
+;  !0 = !{!1}
+;  !1 = distinct !{!1, !2, !"callee1: %a"}
+;  !2 = distinct !{!2, !"callee1"}
+;  !3 = !{!1, !4}
+;  !4 = distinct !{!4, !5, !"callee0: %a"}
+;  !5 = distinct !{!5, !"callee0"}
+; Which is incorrect because the lifetime.end of %src will now "noalias" the above memcpy.
+define i8 @test(i8 %input) {
+  %tmp = alloca i8
+  %dst = alloca i8
+  %src = alloca i8
+; NOTE: we're matching the full line and looking for the lack of !alias.scope here
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %src, i64 1, i1 false)
+  call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %src), !noalias !3
+  store i8 %input, i8* %src
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 1, i1 false), !alias.scope !0
+  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %src), !noalias !3
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 1, i1 false), !alias.scope !3
+  %ret_value = load i8, i8* %dst
+  ret i8 %ret_value
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
+
+!0 = !{!1}
+!1 = distinct !{!1, !2, !"callee0: %a"}
+!2 = distinct !{!2, !"callee0"}
+!3 = !{!4}
+!4 = distinct !{!4, !5, !"callee1: %a"}
+!5 = distinct !{!5, !"callee1"}

diff  --git a/llvm/test/Transforms/NewGVN/noalias.ll b/llvm/test/Transforms/NewGVN/noalias.ll
index 23fc95d4ff8d..3e8bfa26affb 100644
--- a/llvm/test/Transforms/NewGVN/noalias.ll
+++ b/llvm/test/Transforms/NewGVN/noalias.ll
@@ -5,7 +5,7 @@ define i32 @test1(i32* %p, i32* %q) {
 ; CHECK: load i32, i32* %p
 ; CHECK-NOT: noalias
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !noalias !0
+  %a = load i32, i32* %p, !noalias !3
   %b = load i32, i32* %p
   %c = add i32 %a, %b
   ret i32 %c
@@ -13,31 +13,32 @@ define i32 @test1(i32* %p, i32* %q) {
 
 define i32 @test2(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test2(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !0
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE1:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !0
-  %b = load i32, i32* %p, !alias.scope !0
+  %a = load i32, i32* %p, !alias.scope !3
+  %b = load i32, i32* %p, !alias.scope !3
   %c = add i32 %a, %b
   ret i32 %c
 }
 
-; FIXME: In this case we can do better than intersecting the scopes, and can
-; concatenate them instead. Both loads are in the same basic block, the first
-; makes the second safe to speculatively execute, and there are no calls that may
-; throw in between.
 define i32 @test3(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test3(i32* %p, i32* %q)
-; CHECK: load i32, i32* %p, align 4, !alias.scope !1
+; CHECK: load i32, i32* %p, align 4, !alias.scope ![[SCOPE2:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32, i32* %p, !alias.scope !1
-  %b = load i32, i32* %p, !alias.scope !2
+  %a = load i32, i32* %p, !alias.scope !4
+  %b = load i32, i32* %p, !alias.scope !5
   %c = add i32 %a, %b
   ret i32 %c
 }
 
+; CHECK:   ![[SCOPE1]] = !{!{{[0-9]+}}}
+; CHECK:   ![[SCOPE2]] = !{!{{[0-9]+}}, !{{[0-9]+}}}
 declare i32 @foo(i32*) readonly
 
-!0 = !{!0}
-!1 = !{!1}
-!2 = !{!0, !1}
+!0 = distinct !{!0, !2, !"callee0: %a"}
+!1 = distinct !{!1, !2, !"callee0: %b"}
+!2 = distinct !{!2, !"callee0"}
 
+!3 = !{!0}
+!4 = !{!1}
+!5 = !{!0, !1}


        


More information about the llvm-branch-commits mailing list