[llvm] b493124 - [MemorySSA] Support invariant.group metadata

Wed Sep 8 13:06:30 PDT 2021

Author: Arthur Eubanks
Date: 2021-09-08T13:06:12-07:00
New Revision: b493124ae2de5074acc8207155274f84732cd06b

URL: https://github.com/llvm/llvm-project/commit/b493124ae2de5074acc8207155274f84732cd06b
DIFF: https://github.com/llvm/llvm-project/commit/b493124ae2de5074acc8207155274f84732cd06b.diff

LOG: [MemorySSA] Support invariant.group metadata

The implementation is mostly copied from MemDepAnalysis. We want to look
at all loads and stores to the same pointer operand. Bitcasts and zero
GEPs of a pointer are considered the same pointer value. We choose the
most dominating instruction.

Since updating MemorySSA with invariant.group is non-trivial, for now
handling of invariant.group is not cached in any way, so it's part of
the walker. The number of loads/stores with invariant.group is small for
now anyway. We can revisit if this actually noticeably affects compile
times.

To avoid invariant.group affecting optimized uses, we need to have
optimizeUsesInBlock() not use invariant.group in any way.

Co-authored-by: Piotr Padlewski <prazek at google.com>

Reviewed By: asbirlea, nikic, Prazek

Differential Revision: https://reviews.llvm.org/D109134

Added: 
    llvm/test/Transforms/NewGVN/invariant.group.ll

Modified: 
    llvm/lib/Analysis/MemorySSA.cpp
    llvm/test/Analysis/MemorySSA/invariant-groups.ll
    llvm/unittests/Analysis/MemorySSATest.cpp

Removed: 
    llvm/test/Transforms/NewGVN/invariant.group-xfail.ll


################################################################################
diff  --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index b43c45a26094f..4dc2a427caa26 100644

--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -1039,7 +1039,8 @@ template <class AliasAnalysisType> class MemorySSA::ClobberWalkerBase {
   // updated if a new clobber is found by this SkipSelf search. If this
   // additional query becomes heavily used we may decide to cache the result.
   // Walker instantiations will decide how to set the SkipSelf bool.
-  MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool);
+  MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool,
+                                              bool UseInvariantGroup = true);
 };
 
 /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
@@ -1064,6 +1065,11 @@ class MemorySSA::CachingWalker final : public MemorySSAWalker {
                                           unsigned &UWL) {
     return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
   }
+  // This method is not accessible outside of this file.
+  MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(MemoryAccess *MA,
+                                                               unsigned &UWL) {
+    return Walker->getClobberingMemoryAccessBase(MA, UWL, false, false);
+  }
 
   MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
     unsigned UpwardWalkLimit = MaxCheckLimit;
@@ -1460,10 +1466,13 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
     unsigned UpwardWalkLimit = MaxCheckLimit;
     while (UpperBound > LocInfo.LowerBound) {
       if (isa<MemoryPhi>(VersionStack[UpperBound])) {
-        // For phis, use the walker, see where we ended up, go there
+        // For phis, use the walker, see where we ended up, go there.
+        // The invariant.group handling in MemorySSA is ad-hoc and doesn't
+        // support updates, so don't use it to optimize uses.
         MemoryAccess *Result =
-            Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit);
-        // We are guaranteed to find it or something is wrong
+            Walker->getClobberingMemoryAccessWithoutInvariantGroup(
+                MU, UpwardWalkLimit);
+        // We are guaranteed to find it or something is wrong.
         while (VersionStack[UpperBound] != Result) {
           assert(UpperBound != 0);
           --UpperBound;
@@ -2469,15 +2478,88 @@ MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
   return Clobber;
 }
 
+static const Instruction *
+getInvariantGroupClobberingInstruction(Instruction &I, DominatorTree &DT) {
+  if (!I.hasMetadata(LLVMContext::MD_invariant_group) || I.isVolatile())
+    return nullptr;
+
+  // We consider bitcasts and zero GEPs to be the same pointer value. Start by
+  // stripping bitcasts and zero GEPs, then we will recursively look at loads
+  // and stores through bitcasts and zero GEPs.
+  Value *PointerOperand = getLoadStorePointerOperand(&I)->stripPointerCasts();
+
+  // It's not safe to walk the use list of a global value because function
+  // passes aren't allowed to look outside their functions.
+  // FIXME: this could be fixed by filtering instructions from outside of
+  // current function.
+  if (isa<Constant>(PointerOperand))
+    return nullptr;
+
+  // Queue to process all pointers that are equivalent to load operand.
+  SmallVector<const Value *, 8> PointerUsesQueue;
+  PointerUsesQueue.push_back(PointerOperand);
+
+  const Instruction *MostDominatingInstruction = &I;
+
+  // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case
+  // we will see all the instructions. It may not matter in practice. If it
+  // does, we will have to support MemorySSA construction and updates.
+  while (!PointerUsesQueue.empty()) {
+    const Value *Ptr = PointerUsesQueue.pop_back_val();
+    assert(Ptr && !isa<GlobalValue>(Ptr) &&
+           "Null or GlobalValue should not be inserted");
+
+    for (const User *Us : Ptr->users()) {
+      auto *U = dyn_cast<Instruction>(Us);
+      if (!U || U == &I || !DT.dominates(U, MostDominatingInstruction))
+        continue;
+
+      // Add bitcasts and zero GEPs to queue.
+      if (isa<BitCastInst>(U)) {
+        PointerUsesQueue.push_back(U);
+        continue;
+      }
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+        if (GEP->hasAllZeroIndices())
+          PointerUsesQueue.push_back(U);
+        continue;
+      }
+
+      // If we hit a load/store with an invariant.group metadata and the same
+      // pointer operand, we can assume that value pointed to by the pointer
+      // operand didn't change.
+      if (U->hasMetadata(LLVMContext::MD_invariant_group) &&
+          getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) {
+        MostDominatingInstruction = U;
+      }
+    }
+  }
+  return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction;
+}
+
 template <typename AliasAnalysisType>
 MemoryAccess *
 MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
-    MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) {
+    MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf,
+    bool UseInvariantGroup) {
   auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
   // If this is a MemoryPhi, we can't do anything.
   if (!StartingAccess)
     return MA;
 
+  if (UseInvariantGroup) {
+    if (auto *I = getInvariantGroupClobberingInstruction(
+            *StartingAccess->getMemoryInst(), MSSA->getDomTree())) {
+      assert(isa<LoadInst>(I) || isa<StoreInst>(I));
+
+      auto *ClobberMA = MSSA->getMemoryAccess(I);
+      assert(ClobberMA);
+      if (isa<MemoryUse>(ClobberMA))
+        return ClobberMA->getDefiningAccess();
+      return ClobberMA;
+    }
+  }
+
   bool IsOptimized = false;
 
   // If this is an already optimized use or def, return the optimized result.

diff  --git a/llvm/test/Analysis/MemorySSA/invariant-groups.ll b/llvm/test/Analysis/MemorySSA/invariant-groups.ll
index 8a7ba6eb56fc8..9a4bbb5c859b5 100644
--- a/llvm/test/Analysis/MemorySSA/invariant-groups.ll
+++ b/llvm/test/Analysis/MemorySSA/invariant-groups.ll
@@ -1,11 +1,44 @@
 ; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa-walker>' -verify-memoryssa < %s 2>&1 | FileCheck %s
-;
-; Currently, MemorySSA doesn't support invariant groups. So, we should ignore
-; launder.invariant.group intrinsics entirely. We'll need to pay attention to
-; them when/if we decide to support invariant groups.
 
 @g = external global i32
 
+; CHECK-LABEL: define {{.*}} @global(
+define i32 @global() {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 0
+  store i32 0, i32* @g, align 4, !invariant.group !0
+
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: call void @clobber
+  call void @clobber(i32* @g)
+
+; FIXME: this could be clobbered by 1 if we walked the instruction list for loads/stores to @g.
+; But we can't look at the uses of @g in a function analysis.
+; CHECK: MemoryUse(2) {{.*}} clobbered by 2
+; CHECK-NEXT: %1 = load i32
+  %1 = load i32, i32* @g, align 4, !invariant.group !0
+  ret i32 %1
+}
+
+; CHECK-LABEL: define {{.*}} @global2(
+define i32 @global2() {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 0
+  store i32 0, i32* inttoptr (i64 ptrtoint (i32* @g to i64) to i32*), align 4, !invariant.group !0
+
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: call void @clobber
+  call void @clobber(i32* inttoptr (i64 ptrtoint (i32* @g to i64) to i32*))
+
+; FIXME: this could be clobbered by 1 if we walked the instruction list for loads/stores to @g.
+; But we can't look at the uses of @g in a function analysis.
+; CHECK: MemoryUse(2) {{.*}} clobbered by 2
+; CHECK-NEXT: %1 = load i32
+  %1 = load i32, i32* inttoptr (i64 ptrtoint (i32* @g to i64) to i32*), align 4, !invariant.group !0
+  ret i32 %1
+}
+
+; CHECK-LABEL: define {{.*}} @foo(
 define i32 @foo(i32* %a) {
 ; CHECK: 1 = MemoryDef(liveOnEntry)
 ; CHECK-NEXT: store i32 0
@@ -29,6 +62,41 @@ define i32 @foo(i32* %a) {
   ret i32 %2
 }
 
+; CHECK-LABEL: define {{.*}} @volatile1(
+define void @volatile1(i32* %a) {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 0
+  store i32 0, i32* %a, align 4, !invariant.group !0
+
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: call void @clobber
+  call void @clobber(i32* %a)
+
+; CHECK: 3 = MemoryDef(2){{.*}} clobbered by 2
+; CHECK-NEXT: load volatile
+  %b = load volatile i32, i32* %a, align 4, !invariant.group !0
+
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}} @volatile2(
+define void @volatile2(i32* %a) {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store volatile i32 0
+  store volatile i32 0, i32* %a, align 4, !invariant.group !0
+
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: call void @clobber
+  call void @clobber(i32* %a)
+
+; CHECK: MemoryUse(2){{.*}} clobbered by 2
+; CHECK-NEXT: load i32
+  %b = load i32, i32* %a, align 4, !invariant.group !0
+
+  ret void
+}
+
+; CHECK-LABEL: define {{.*}} @skipBarrier(
 define i32 @skipBarrier(i32* %a) {
 ; CHECK: 1 = MemoryDef(liveOnEntry)
 ; CHECK-NEXT: store i32 0
@@ -47,6 +115,7 @@ define i32 @skipBarrier(i32* %a) {
   ret i32 %2
 }
 
+; CHECK-LABEL: define {{.*}} @skipBarrier2(
 define i32 @skipBarrier2(i32* %a) {
 
 ; CHECK: MemoryUse(liveOnEntry)
@@ -67,8 +136,7 @@ define i32 @skipBarrier2(i32* %a) {
 ; CHECK-NEXT: store i32 1
   store i32 1, i32* @g, align 4
 
-; FIXME: based on invariant.group it should be MemoryUse(liveOnEntry)
-; CHECK: MemoryUse(2) {{.*}} clobbered by 2
+; CHECK: MemoryUse(2) {{.*}} clobbered by liveOnEntry
 ; CHECK-NEXT: %v3 = load i32
   %v3 = load i32, i32* %a32, align 4, !invariant.group !0
   %add = add nsw i32 %v2, %v3
@@ -76,6 +144,7 @@ define i32 @skipBarrier2(i32* %a) {
   ret i32 %add2
 }
 
+; CHECK-LABEL: define {{.*}} @handleInvariantGroups(
 define i32 @handleInvariantGroups(i32* %a) {
 ; CHECK: 1 = MemoryDef(liveOnEntry)
 ; CHECK-NEXT: store i32 0
@@ -98,14 +167,14 @@ define i32 @handleInvariantGroups(i32* %a) {
 ; CHECK-NEXT: store i32 2
   store i32 2, i32* @g, align 4
 
-; FIXME: This can be changed to MemoryUse(2)
-; CHECK: MemoryUse(4) {{.*}} clobbered by 4
+; CHECK: MemoryUse(4) {{.*}} clobbered by 2
 ; CHECK-NEXT: %3 = load i32
   %3 = load i32, i32* %a32, align 4, !invariant.group !0
   %add = add nsw i32 %2, %3
   ret i32 %add
 }
 
+; CHECK-LABEL: define {{.*}} @loop(
 define i32 @loop(i1 %a) {
 entry:
   %0 = alloca i32, align 4
@@ -118,15 +187,13 @@ entry:
   br i1 %a, label %Loop.Body, label %Loop.End
 
 Loop.Body:
-; FIXME: MemoryUse(1)
-; CHECK: MemoryUse(2) {{.*}} clobbered by 2
+; CHECK: MemoryUse(2) {{.*}} clobbered by 1
 ; CHECK-NEXT: %1 = load i32
   %1 = load i32, i32* %0, !invariant.group !0
   br i1 %a, label %Loop.End, label %Loop.Body
 
 Loop.End:
-; FIXME: MemoryUse(1)
-; CHECK: MemoryUse(2) {{.*}} clobbered by 2
+; CHECK: MemoryUse(2) {{.*}} clobbered by 1
 ; CHECK-NEXT: %2 = load
   %2 = load i32, i32* %0, align 4, !invariant.group !0
   br i1 %a, label %Ret, label %Loop.Body
@@ -135,6 +202,7 @@ Ret:
   ret i32 %2
 }
 
+; CHECK-LABEL: define {{.*}} @loop2(
 define i8 @loop2(i8* %p) {
 entry:
 ; CHECK: 1 = MemoryDef(liveOnEntry)
@@ -154,8 +222,7 @@ Loop.Body:
 ; CHECK-NEXT: %0 = load i8
   %0 = load i8, i8* %after, !invariant.group !0
 
-; FIXME: MemoryUse(1)
-; CHECK: MemoryUse(6) {{.*}} clobbered by 6
+; CHECK: MemoryUse(6) {{.*}} clobbered by 1
 ; CHECK-NEXT: %1 = load i8
   %1 = load i8, i8* %p, !invariant.group !0
 
@@ -169,8 +236,7 @@ Loop.End:
 ; CHECK-NEXT: %2 = load
   %2 = load i8, i8* %after, align 4, !invariant.group !0
 
-; FIXME: MemoryUse(1)
-; CHECK: MemoryUse(5) {{.*}} clobbered by 5
+; CHECK: MemoryUse(5) {{.*}} clobbered by 1
 ; CHECK-NEXT: %3 = load
   %3 = load i8, i8* %p, align 4, !invariant.group !0
   br i1 undef, label %Ret, label %Loop.Body
@@ -180,6 +246,7 @@ Ret:
 }
 
 
+; CHECK-LABEL: define {{.*}} @loop3(
 define i8 @loop3(i8* %p) {
 entry:
 ; CHECK: 1 = MemoryDef(liveOnEntry)
@@ -203,8 +270,7 @@ Loop.Body:
 ; CHECK-NEXT: call void @clobber8
   call void @clobber8(i8* %after)
 
-; FIXME: MemoryUse(8)
-; CHECK: MemoryUse(4) {{.*}} clobbered by 4
+; CHECK: MemoryUse(4) {{.*}} clobbered by 8
 ; CHECK-NEXT: %1 = load i8
   %1 = load i8, i8* %after, !invariant.group !0
 
@@ -214,8 +280,7 @@ Loop.next:
 ; CHECK-NEXT: call void @clobber8
   call void @clobber8(i8* %after)
 
-; FIXME: MemoryUse(8)
-; CHECK: MemoryUse(5) {{.*}} clobbered by 5
+; CHECK: MemoryUse(5) {{.*}} clobbered by 8
 ; CHECK-NEXT: %2 = load i8
   %2 = load i8, i8* %after, !invariant.group !0
 
@@ -230,8 +295,7 @@ Loop.End:
 ; CHECK-NEXT: call void @clobber8
   call void @clobber8(i8* %after)
 
-; FIXME: MemoryUse(7)
-; CHECK: MemoryUse(6) {{.*}} clobbered by 6
+; CHECK: MemoryUse(6) {{.*}} clobbered by 7
 ; CHECK-NEXT: %4 = load
   %4 = load i8, i8* %after, align 4, !invariant.group !0
   br i1 undef, label %Ret, label %Loop.Body
@@ -240,6 +304,7 @@ Ret:
   ret i8 %3
 }
 
+; CHECK-LABEL: define {{.*}} @loop4(
 define i8 @loop4(i8* %p) {
 entry:
 ; CHECK: 1 = MemoryDef(liveOnEntry)
@@ -263,8 +328,7 @@ Loop.Body:
 ; CHECK-NEXT: %1 = load i8
   %1 = load i8, i8* %after, !invariant.group !0
 
-; FIXME: MemoryUse(2)
-; CHECK: MemoryUse(6) {{.*}} clobbered by 6
+; CHECK: MemoryUse(6) {{.*}} clobbered by 1
 ; CHECK-NEXT: %2 = load i8
   %2 = load i8, i8* %p, !invariant.group !0
 
@@ -277,8 +341,7 @@ Loop.End:
 ; CHECK-NEXT: %3 = load
   %3 = load i8, i8* %after, align 4, !invariant.group !0
 
-; FIXME: MemoryUse(2)
-; CHECK: MemoryUse(5) {{.*}} clobbered by 5
+; CHECK: MemoryUse(5) {{.*}} clobbered by 1
 ; CHECK-NEXT: %4 = load
   %4 = load i8, i8* %p, align 4, !invariant.group !0
   br i1 undef, label %Ret, label %Loop.Body
@@ -288,7 +351,7 @@ Ret:
 }
 
 ; In the future we would like to CSE barriers if there is no clobber between.
-; CHECK-LABEL: define i8 @optimizable()
+; CHECK-LABEL: define {{.*}} @optimizable(
 define i8 @optimizable() {
 entry:
   %ptr = alloca i8
@@ -318,7 +381,7 @@ entry:
   ret i8 %v
 }
 
-; CHECK-LABEL: define i8 @unoptimizable2()
+; CHECK-LABEL: define {{.*}} @unoptimizable2()
 define i8 @unoptimizable2() {
   %ptr = alloca i8
 ; CHECK: 1 = MemoryDef(liveOnEntry)

diff  --git a/llvm/test/Transforms/NewGVN/invariant.group-xfail.ll b/llvm/test/Transforms/NewGVN/invariant.group.ll
similarity index 85%
rename from llvm/test/Transforms/NewGVN/invariant.group-xfail.ll
rename to llvm/test/Transforms/NewGVN/invariant.group.ll
index 78e61a2f2b681..772c1c66bec2a 100644
--- a/llvm/test/Transforms/NewGVN/invariant.group-xfail.ll
+++ b/llvm/test/Transforms/NewGVN/invariant.group.ll
@@ -1,5 +1,4 @@
-; XFAIL: *
-; RUN: opt < %s -newgvn -S | FileCheck %s
+; RUN: opt < %s -passes=newgvn -S | FileCheck %s
 
 %struct.A = type { i32 (...)** }
 @_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8
@@ -54,9 +53,6 @@ entry:
 
 ; CHECK-LABEL: define i1 @proveEqualityForStrip(
 define i1 @proveEqualityForStrip(i8* %a) {
-; FIXME: The first call could be also removed by GVN. Right now
-; DCE removes it. The second call is CSE'd with the first one.
-; CHECK: %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
   %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
 ; CHECK-NOT: llvm.strip.invariant.group
   %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a)
@@ -76,6 +72,7 @@ entry:
     ret i8 %a
 }
 
+; NewGVN doesn't support assumes.
 ; CHECK-LABEL: define void @indirectLoads() {
 define void @indirectLoads() {
 entry:
@@ -96,7 +93,7 @@ entry:
   %3 = load %struct.A*, %struct.A** %a, align 8
   %4 = bitcast %struct.A* %3 to void (%struct.A*)***
 
-; CHECK: call void @_ZN1A3fooEv(
+; FIXME: call void @_ZN1A3fooEv(
   %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !0
   %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0
   %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8
@@ -104,7 +101,7 @@ entry:
   %6 = load %struct.A*, %struct.A** %a, align 8
   %7 = bitcast %struct.A* %6 to void (%struct.A*)***
 
-; CHECK: call void @_ZN1A3fooEv(
+; FIXME: call void @_ZN1A3fooEv(
   %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !0
   %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0
   %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8
@@ -116,19 +113,20 @@ entry:
   %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !0
   %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0
   %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8
-; CHECK: call void @_ZN1A3fooEv(
+; FIXME: call void @_ZN1A3fooEv(
   call void %11(%struct.A* %9)
  
   %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !0
   %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0
   %12 = bitcast i8** %vfn6 to void (%struct.A*)**
   %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8
-; CHECK: call void @_ZN1A3fooEv(
+; FIXME: call void @_ZN1A3fooEv(
   call void %13(%struct.A* %9)
   
   ret void
 }
 
+; NewGVN won't CSE loads with 
diff erent pointee types.
 ; CHECK-LABEL: define void @combiningBitCastWithLoad() {
 define void @combiningBitCastWithLoad() {
 entry:
@@ -145,7 +143,7 @@ entry:
   %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2)
   
   store %struct.A* %1, %struct.A** %a, align 8
-; CHECK-NOT: !invariant.group
+; FIXME-NOT: !invariant.group
   %3 = load %struct.A*, %struct.A** %a, align 8
   %4 = bitcast %struct.A* %3 to void (%struct.A*)***
 
@@ -163,7 +161,7 @@ enter:
   %ptr = alloca i8
   store i8 42, i8* %ptr
   call void @foo(i8* %ptr)
-; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group
+; CHECK: %[[A:.*]] = load i8, i8* %ptr, align 1, !invariant.group
   %a = load i8, i8* %ptr, !invariant.group !0
 ; CHECK-NOT: load
   %b = load i8, i8* %ptr, !invariant.group !0
@@ -180,7 +178,7 @@ enter:
   %ptr = alloca i8
   store i8 42, i8* %ptr
   call void @foo(i8* %ptr)
-; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group
+; CHECK: %[[D:.*]] = load i8, i8* %ptr, align 1, !invariant.group
   %c = load i8, i8* %ptr
 ; CHECK-NOT: load
   %d = load i8, i8* %ptr, !invariant.group !0
@@ -197,7 +195,7 @@ enter:
   %ptr = alloca i8
   store i8 42, i8* %ptr
   call void @foo(i8* %ptr)
-; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, align 1, !invariant.group
   %e = load i8, i8* %ptr, !invariant.group !0
 ; CHECK-NOT: load
   %f = load i8, i8* %ptr
@@ -214,7 +212,7 @@ enter:
   %ptr = alloca i8
   store i8 42, i8* %ptr
   call void @foo(i8* %ptr)
-; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, align 1, !invariant.group
   %e = load i8, i8* %ptr, !invariant.group !0
 ; CHECK-NOT: load
   %f = load i8, i8* %ptr, !invariant.group !0
@@ -251,16 +249,17 @@ entry:
     ret i8 %a
 }
 
+; NewGVN cares about the launder for some reason.
 ; CHECK-LABEL: define i8 @optimizable4() {
 define i8 @optimizable4() {
 entry:
     %ptr = alloca i8
-    store i8 42, i8* %ptr, !invariant.group !0
+    store i8 42, i8* %ptr
     %ptr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr)
-; CHECK-NOT: load
-    %a = load i8, i8* %ptr2, !invariant.group !0
+; FIXME-NOT: load
+    %a = load i8, i8* %ptr2
     
-; CHECK: ret i8 42
+; FIXME: ret i8 42
     ret i8 %a
 }
 
@@ -276,7 +275,7 @@ entry:
     call void @bar(i8 %b)
 
     %c = load volatile i8, i8* %ptr, !invariant.group !0
-; FIXME: we could change %c to 42, preserving volatile load
+; We might be able to optimize this, but nobody cares
 ; CHECK: call void @bar(i8 %c)
     call void @bar(i8 %c)
 ; CHECK: ret i8 42
@@ -295,15 +294,15 @@ entry:
     call void @bar(i8 %b)
 
     %c = load volatile i8, i8* %ptr, !invariant.group !0
-; FIXME: we could change %c to 42, preserving volatile load
+; We might be able to optimize this, but nobody cares
 ; CHECK: call void @bar(i8 %c)
     call void @bar(i8 %c)
 ; CHECK: ret i8 42
     ret i8 %a
 }
 
-; CHECK-LABEL: define i8 @fun() {
-define i8 @fun() {
+; CHECK-LABEL: define void @fun() {
+define void @fun() {
 entry:
     %ptr = alloca i8
     store i8 42, i8* %ptr, !invariant.group !0
@@ -313,23 +312,10 @@ entry:
 ; CHECK: call void @bar(i8 42)
     call void @bar(i8 %a)
 
-    %newPtr = call i8* @getPointer(i8* %ptr) 
-    %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
-; CHECK: call void @bar(i8 %c)
-    call void @bar(i8 %c)
-    
-    %unknownValue = load i8, i8* @unknownPtr
-; FIXME: Can assume that %unknownValue == 42
-; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0
-    store i8 %unknownValue, i8* %ptr, !invariant.group !0 
-
-    %newPtr2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr)
-; CHECK-NOT: load
-    %d = load i8, i8* %newPtr2, !invariant.group !0
-; CHECK: ret i8 %unknownValue
-    ret i8 %d
+    ret void
 }
 
+; FIXME: NewGVN doesn't run instsimplify on a load from a vtable definition?
 ; This test checks if invariant.group understands gep with zeros
 ; CHECK-LABEL: define void @testGEP0() {
 define void @testGEP0() {
@@ -347,7 +333,7 @@ define void @testGEP0() {
   %6 = bitcast %struct.A* %a to void (%struct.A*)***
   %7 = load void (%struct.A*)**, void (%struct.A*)*** %6, align 8, !invariant.group !0
   %8 = load void (%struct.A*)*, void (%struct.A*)** %7, align 8
-; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
+; FIXME: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
   call void %8(%struct.A* nonnull %a)
   br label %_Z1gR1A.exit
 
@@ -360,10 +346,10 @@ _Z1gR1A.exit:                                     ; preds = %0, %5
 ; from the same function.
 ; CHECK-LABEL: define void @testGlobal() {
 define void @testGlobal() {
-; CHECK:  %a = load i8, i8* @unknownPtr, !invariant.group !0
+; CHECK:  %a = load i8, i8* @unknownPtr, align 1, !invariant.group !0
    %a = load i8, i8* @unknownPtr, !invariant.group !0
    call void @foo2(i8* @unknownPtr, i8 %a)
-; CHECK:  %1 = load i8, i8* @unknownPtr, !invariant.group !0
+; CHECK:  %1 = load i8, i8* @unknownPtr, align 1, !invariant.group !0
    %1 = load i8, i8* @unknownPtr, !invariant.group !0
    call void @bar(i8 %1)
 
@@ -378,12 +364,14 @@ define void @testGlobal() {
    call void @fooBit(i1* %b0, i1 %3)
    ret void
 }
-; And in the case it is not global
-; CHECK-LABEL: define void @testNotGlobal() {
-define void @testNotGlobal() {
+
+; Might be similar to above where NewGVN doesn't handle loads of 
diff erent types from the same location.
+; Not super important anyway.
+; CHECK-LABEL: define void @testTrunc() {
+define void @testTrunc() {
    %a = alloca i8
    call void @foo(i8* %a)
-; CHECK:  %b = load i8, i8* %a, !invariant.group !0
+; CHECK:  %b = load i8, i8* %a, align 1, !invariant.group !0
    %b = load i8, i8* %a, !invariant.group !0
    call void @foo2(i8* %a, i8 %b)
 
@@ -393,16 +381,17 @@ define void @testNotGlobal() {
 
    %b0 = bitcast i8* %a to i1*
    call void @fooBit(i1* %b0, i1 1)
-; CHECK: %1 = trunc i8 %b to i1
+; FIXME: %1 = trunc i8 %b to i1
    %2 = load i1, i1* %b0, !invariant.group !0
-; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %1)
+; FIXME-NEXT: call void @fooBit(i1* %b0, i1 %1)
    call void @fooBit(i1* %b0, i1 %2)
    %3 = load i1, i1* %b0, !invariant.group !0
-; CHECK-NEXT: call void @fooBit(i1* %b0, i1 %1)
+; FIXME-NEXT: call void @fooBit(i1* %b0, i1 %1)
    call void @fooBit(i1* %b0, i1 %3)
    ret void
 }
 
+; See comment in @testGEP0 on what NewGVN is lacking.
 ; CHECK-LABEL: define void @handling_loops()
 define void @handling_loops() {
   %a = alloca %struct.A, align 8
@@ -426,9 +415,9 @@ define void @handling_loops() {
   %8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ]
   %.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0
   %9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8
-  ; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
+  ; FIXME: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
   call void %9(%struct.A* nonnull %a) #3
-  ; CHECK-NOT: call void %
+  ; FIXME-NOT: call void %
   %10 = add nuw nsw i8 %8, 1
   %11 = load i8, i8* @unknownPtr, align 4
   %12 = icmp slt i8 %10, %11
@@ -451,10 +440,11 @@ declare void @_ZN1AC1Ev(%struct.A*)
 declare void @fooBit(i1*, i1)
 
 declare i8* @llvm.launder.invariant.group.p0i8(i8*)
+declare i8* @llvm.strip.invariant.group.p0i8(i8*)
 
 ; Function Attrs: nounwind
 declare void @llvm.assume(i1 %cmp.vtables) #0
 
 
 attributes #0 = { nounwind }
-!0 = !{}
\ No newline at end of file
+!0 = !{}

diff  --git a/llvm/unittests/Analysis/MemorySSATest.cpp b/llvm/unittests/Analysis/MemorySSATest.cpp
index 3c8aa0159e628..959b60adb069d 100644
--- a/llvm/unittests/Analysis/MemorySSATest.cpp
+++ b/llvm/unittests/Analysis/MemorySSATest.cpp
@@ -1725,4 +1725,50 @@ TEST_F(MemorySSATest, TestLoopInvariantEntryBlockPointer) {
       EXPECT_TRUE(ItB->second.Size.getValue() == 8);
     }
   }
-}
\ No newline at end of file
+}
+
+TEST_F(MemorySSATest, TestInvariantGroup) {
+  SMDiagnostic E;
+  auto M = parseAssemblyString("declare void @f(i8*)\n"
+                               "define i8 @test(i8* %p) {\n"
+                               "entry:\n"
+                               "  store i8 42, i8* %p, !invariant.group !0\n"
+                               "  call void @f(i8* %p)\n"
+                               "  %v = load i8, i8* %p, !invariant.group !0\n"
+                               "  ret i8 %v\n"
+                               "}\n"
+                               "!0 = !{}",
+                               E, C);
+  ASSERT_TRUE(M);
+  F = M->getFunction("test");
+  ASSERT_TRUE(F);
+  setupAnalyses();
+  MemorySSA &MSSA = *Analyses->MSSA;
+  MemorySSAWalker *Walker = Analyses->Walker;
+
+  auto &BB = F->getEntryBlock();
+  auto &SI = cast<StoreInst>(*BB.begin());
+  auto &Call = cast<CallBase>(*std::next(BB.begin()));
+  auto &LI = cast<LoadInst>(*std::next(std::next(BB.begin())));
+
+  {
+    MemoryAccess *SAccess = MSSA.getMemoryAccess(&SI);
+    MemoryAccess *LAccess = MSSA.getMemoryAccess(&LI);
+    MemoryAccess *SClobber = Walker->getClobberingMemoryAccess(SAccess);
+    EXPECT_TRUE(MSSA.isLiveOnEntryDef(SClobber));
+    MemoryAccess *LClobber = Walker->getClobberingMemoryAccess(LAccess);
+    EXPECT_EQ(SAccess, LClobber);
+  }
+
+  // remove store and verify that the memory accesses still make sense
+  MemorySSAUpdater Updater(&MSSA);
+  Updater.removeMemoryAccess(&SI);
+  SI.eraseFromParent();
+
+  {
+    MemoryAccess *CallAccess = MSSA.getMemoryAccess(&Call);
+    MemoryAccess *LAccess = MSSA.getMemoryAccess(&LI);
+    MemoryAccess *LClobber = Walker->getClobberingMemoryAccess(LAccess);
+    EXPECT_EQ(CallAccess, LClobber);
+  }
+}