[llvm] r342422 - [EarlyCSEwMemorySSA] Add MSSA verification and tests to make EarlyCSE failures easier to track.

Alina Sbirlea via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 17 15:35:21 PDT 2018


Author: asbirlea
Date: Mon Sep 17 15:35:21 2018
New Revision: 342422

URL: http://llvm.org/viewvc/llvm-project?rev=342422&view=rev
Log:
[EarlyCSEwMemorySSA] Add MSSA verification and tests to make EarlyCSE failures easier to track.

Summary:
EarlyCSE can make IR changes that will leave MemorySSA with accesses claiming to be optimized, but for which a subsequent MemorySSA run will yield a different optimized result.
Due to relying on AA queries, we can't fix this in general, unless we recompute MemorySSA.
Adding some tests to track this and a basic verify for future potential failures.

Reviewers: george.burgess.iv, gberry

Subscribers: sanjoy, jlebar, Prazek, llvm-commits

Differential Revision: https://reviews.llvm.org/D51960

Added:
    llvm/trunk/test/Transforms/EarlyCSE/preserve_memoryssa.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp?rev=342422&r1=342421&r2=342422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp Mon Sep 17 15:35:21 2018
@@ -604,6 +604,8 @@ private:
   void removeMSSA(Instruction *Inst) {
     if (!MSSA)
       return;
+    if (VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
     // Removing a store here can leave MemorySSA in an unoptimized state by
     // creating MemoryPhis that have identical arguments and by creating
     // MemoryUses whose defining access is not an actual clobber.  We handle the

Added: llvm/trunk/test/Transforms/EarlyCSE/preserve_memoryssa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/EarlyCSE/preserve_memoryssa.ll?rev=342422&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/EarlyCSE/preserve_memoryssa.ll (added)
+++ llvm/trunk/test/Transforms/EarlyCSE/preserve_memoryssa.ll Mon Sep 17 15:35:21 2018
@@ -0,0 +1,137 @@
+; RUN: opt < %s -early-cse-memssa -verify-memoryssa -disable-output
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Tests below highlight scenarios where EarlyCSE does not preserve MemorySSA
+; optimized accesses. Current MemorySSA verify will accept these.
+
+; Test 1:
+; AA cannot tell here that the last load does not alias the only store.
+; The first two loads are a common expression, EarlyCSE removes the second one,
+; and then AA can see that the last load is a Use(LoE). Hence not optimized as
+; it claims. Note that if we replace the GEP indices 2 and 1, AA sees NoAlias
+; for the last load, before CSE-ing the first 2 loads.
+%struct.ImageParameters = type { i32, i32, i32 }
+ at img = external global %struct.ImageParameters*, align 8
+define void @test1_macroblock() {
+entry:
+  ; MemoryUse(LoE)
+  %0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+
+  %Pos_2 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 2
+  ; 1 = MemoryDef(LoE)
+  store i32 undef, i32* %Pos_2, align 8
+
+  ; MemoryUse(LoE)
+  %1 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8
+
+  %Pos_1 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %1, i64 0, i32 1
+  ; MemoryUse(1) MayAlias
+  %2 = load i32, i32* %Pos_1, align 4
+  unreachable
+}
+
+; Test 2:
+; EarlyCSE simplifies %string to undef. Def and Use used to be MustAlias, with
+; undef they are NoAlias. The Use can be optimized further to LoE. We can
+; de-optimize uses of replaced instructions, but in general this is not enough
+; (see next tests).
+%struct.TermS = type { i32, i32, i32, i32, i32, i8* }
+define fastcc void @test2_term_string() {
+entry:
+  %string = getelementptr inbounds %struct.TermS, %struct.TermS* undef, i64 0, i32 5
+  ; 1 = MemoryDef(LoE)
+  store i8* undef, i8** %string, align 8
+  ; MemoryUse(1) MustAlias
+  %0 = load i8*, i8** %string, align 8
+  unreachable
+}
+
+; Test 3:
+; EarlyCSE simplifies %0 to undef. So the second Def now stores to undef.
+; We now find the second load (Use(2) can be optimized further to LoE)
+; When replacing instructions, we can deoptimize all uses of the replaced
+; instruction and all uses of transitive accesses. However this does not stop
+; MemorySSA from being tripped by AA (see test4).
+%struct.Grammar = type { i8*, i8*, %struct.anon }
+%struct.anon = type { i32, i32, %struct.Term**, [3 x %struct.Term*] }
+%struct.Term = type { i32 }
+
+define fastcc void @test3_term_string(%struct.Grammar* %g) {
+entry:
+  ; 1 = MemoryDef(LoE)
+  store i8* undef, i8** undef, align 8
+  ; MemoryUse(LoE)
+  %0 = load i8*, i8** undef, align 8
+  %arrayidx = getelementptr inbounds i8, i8* %0, i64 undef
+  ; 2 = MemoryDef(1)
+  store i8 0, i8* %arrayidx, align 1
+  %v = getelementptr inbounds %struct.Grammar, %struct.Grammar* %g, i64 0, i32 2, i32 2
+  ; MemoryUse(2) MayAlias
+  %1 = load %struct.Term**, %struct.Term*** %v, align 8
+  unreachable
+}
+
+; Test 4:
+; Removing dead/unused instructions in if.then274 makes AA smarter. Before
+; removal, it finds %4 MayAlias the store above. After removal this can be
+; optimized to LoE. Hence after EarlyCSE, there is an access who claims is
+; optimized and it can be optimized further.
+
+; We can't escape such cases in general when relying on Alias Analysis.
+; The only fail-safe way to actually preserve MemorySSA when removing or
+; replacing instructions (i.e. get the *same* MemorySSA as if it was computed
+; for the updated IR) is to recompute it from scratch. What we get now is still
+; a correct update, but with accesses that claim to be optimized and can be
+; optimized further if we were to re-run MemorySSA on the IR.
+%struct.gnode.0.1.3.6.9.18.20.79 = type { i32, i32, i32, i32, i32, i32, i32, %struct.gnode.0.1.3.6.9.18.20.79* }
+ at gnodeArray = external global %struct.gnode.0.1.3.6.9.18.20.79**, align 8
+
+define void @test4_shortest() {
+entry:
+  %exl.i = alloca [5 x i32], align 16
+  br i1 undef, label %if.then274, label %for.cond404
+
+if.then274:                                       ; preds = %if.end256
+  %0 = bitcast [5 x i32]* %exl.i to i8*
+  %arrayidx.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
+  %arrayidx1.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
+  %arrayidx2.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
+  %arrayidx3.i = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
+  %1 = bitcast [5 x i32]* %exl.i to i8*
+  %arrayidx.i1034 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
+  %arrayidx1.i1035 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
+  %arrayidx2.i1036 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
+  %arrayidx3.i1037 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
+  unreachable
+
+for.cond404:                                      ; preds = %if.end256
+  %2 = bitcast [5 x i32]* %exl.i to i8*
+  %arrayidx.i960 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
+  %arrayidx1.i961 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
+  %arrayidx2.i962 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
+  ; 1 = MemoryDef(LoE)
+  store i32 undef, i32* %arrayidx2.i962, align 4
+  %arrayidx3.i963 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
+
+  ; MemoryUse(LoE)
+  %3 = load %struct.gnode.0.1.3.6.9.18.20.79**, %struct.gnode.0.1.3.6.9.18.20.79*** @gnodeArray, align 8
+  %arrayidx6.i968 = getelementptr inbounds %struct.gnode.0.1.3.6.9.18.20.79*, %struct.gnode.0.1.3.6.9.18.20.79** %3, i64 undef
+  ; MemoryUse(1) MayAlias
+  %4 = load %struct.gnode.0.1.3.6.9.18.20.79*, %struct.gnode.0.1.3.6.9.18.20.79** %arrayidx6.i968, align 8
+  br i1 undef, label %for.cond26.preheader.i974, label %if.then20.for.body_crit_edge.i999
+
+for.cond26.preheader.i974:                        ; preds = %if.then20.i996
+  %5 = bitcast [5 x i32]* %exl.i to i8*
+  %arrayidx.i924 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 1
+  %arrayidx1.i925 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 2
+  %arrayidx2.i926 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 3
+  %arrayidx3.i927 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 4
+  unreachable
+
+if.then20.for.body_crit_edge.i999:                ; preds = %if.then20.i996
+  %arrayidx9.phi.trans.insert.i997 = getelementptr inbounds [5 x i32], [5 x i32]* %exl.i, i64 0, i64 undef
+  unreachable
+}




More information about the llvm-commits mailing list