[llvm] r291351 - NewGVN: Make sure we properly lookup operand leaders while creating

Daniel Berlin via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 7 08:55:14 PST 2017


Author: dannyb
Date: Sat Jan  7 10:55:14 2017
New Revision: 291351

URL: http://llvm.org/viewvc/llvm-project?rev=291351&view=rev
Log:
NewGVN: Make sure we properly lookup operand leaders while creating
congruence classes for stores, and then keep them up to date.  Add
testcases.

Added:
    llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll
    llvm/trunk/test/Transforms/NewGVN/memory-handling.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp?rev=291351&r1=291350&r2=291351&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp Sat Jan  7 10:55:14 2017
@@ -337,8 +337,11 @@ private:
 
   // New instruction creation.
   void handleNewInstruction(Instruction *){};
+
+  // Various instruction touch utilities
   void markUsersTouched(Value *);
   void markMemoryUsersTouched(MemoryAccess *);
+  void markLeaderChangeTouched(CongruenceClass *CC);
 
   // Utilities.
   void cleanupTables();
@@ -1022,11 +1025,22 @@ void NewGVN::markMemoryUsersTouched(Memo
   }
 }
 
+// Touch the instructions that need to be updated after a congruence class has a
+// leader change, and mark changed values.
+void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
+  for (auto M : CC->Members) {
+    if (auto *I = dyn_cast<Instruction>(M))
+      TouchedInstructions.set(InstrDFS[I]);
+    ChangedValues.insert(M);
+  }
+}
+
 // Perform congruence finding on a given value numbering expression.
 void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
   ValueToExpression[V] = E;
   // This is guaranteed to return something, since it will at least find
   // INITIAL.
+
   CongruenceClass *VClass = ValueToClass[V];
   assert(VClass && "Should have found a vclass");
   // Dead classes should have been eliminated from the mapping.
@@ -1045,14 +1059,17 @@ void NewGVN::performCongruenceFinding(Va
       place->second = NewClass;
 
       // Constants and variables should always be made the leader.
-      if (const auto *CE = dyn_cast<ConstantExpression>(E))
+      if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
         NewClass->RepLeader = CE->getConstantValue();
-      else if (const auto *VE = dyn_cast<VariableExpression>(E))
-        NewClass->RepLeader = VE->getVariableValue();
-      else if (const auto *SE = dyn_cast<StoreExpression>(E))
-        NewClass->RepLeader = SE->getStoreInst()->getValueOperand();
-      else
+      } else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
+        StoreInst *SI = SE->getStoreInst();
+        NewClass->RepLeader =
+            lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
+      } else {
         NewClass->RepLeader = V;
+      }
+      assert(!isa<VariableExpression>(E) &&
+             "VariableExpression should have been handled already");
 
       EClass = NewClass;
       DEBUG(dbgs() << "Created new congruence class for " << *V
@@ -1091,14 +1108,11 @@ void NewGVN::performCongruenceFinding(Va
           ExpressionToClass.erase(VClass->DefiningExpr);
         }
       } else if (VClass->RepLeader == V) {
-        // FIXME: When the leader changes, the value numbering of
-        // everything may change, so we need to reprocess.
+        // When the leader changes, the value numbering of
+        // everything may change due to symbolization changes, so we need to
+        // reprocess.
         VClass->RepLeader = *(VClass->Members.begin());
-        for (auto M : VClass->Members) {
-          if (auto *I = dyn_cast<Instruction>(M))
-            TouchedInstructions.set(InstrDFS[I]);
-          ChangedValues.insert(M);
-        }
+        markLeaderChangeTouched(VClass);
       }
     }
 
@@ -1120,6 +1134,27 @@ void NewGVN::performCongruenceFinding(Va
         markMemoryUsersTouched(MA);
       }
     }
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {
+    // There is, sadly, one complicating thing for stores.  Stores do not
+    // produce values, only consume them.  However, in order to make loads and
+    // stores value number the same, we ignore the value operand of the store.
+    // But the value operand will still be the leader of our class, and thus, it
+    // may change.  Because the store is a use, the store will get reprocessed,
+    // but nothing will change about it, and so nothing above will catch it
+    // (since the class will not change).  In order to make sure everything ends
+    // up okay, we need to recheck the leader of the class.  Since stores of
+    // different values value number differently due to different memorydefs, we
+    // are guaranteed the leader is always the same between stores in the same
+    // class.
+    DEBUG(dbgs() << "Checking store leader\n");
+    auto ProperLeader =
+        lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
+    if (EClass->RepLeader != ProperLeader) {
+      DEBUG(dbgs() << "Store leader changed, fixing\n");
+      EClass->RepLeader = ProperLeader;
+      markLeaderChangeTouched(EClass);
+      markMemoryUsersTouched(MSSA->getMemoryAccess(SI));
+    }
   }
 }
 

Added: llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll?rev=291351&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/basic-cyclic-opt.ll Sat Jan  7 10:55:14 2017
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+;; Function Attrs: nounwind ssp uwtable
+;; We should eliminate the sub, and one of the phi nodes
+define void @vnum_test1(i32* %data) #0 {
+; CHECK-LABEL: @vnum_test1(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB17:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP18:%.*]], [[BB17]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB19:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]]
+; CHECK-NEXT:    store i32 2, i32* [[TMP10]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[DATA]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]]
+; CHECK-NEXT:    br label [[BB17]]
+; CHECK:       bb17:
+; CHECK-NEXT:    [[TMP18]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb19:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %tmp = getelementptr inbounds i32, i32* %data, i64 3
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %data, i64 4
+  %tmp3 = load i32, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb17, %bb
+  %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb17 ]
+  %i.0 = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+  %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb17 ]
+  %tmp5 = icmp slt i32 %i.0, %tmp1
+  br i1 %tmp5, label %bb6, label %bb19
+
+bb6:                                              ; preds = %bb4
+  %tmp7 = getelementptr inbounds i32, i32* %data, i64 2
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp9 = sext i32 %tmp8 to i64
+  %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9
+  store i32 2, i32* %tmp10, align 4
+  %tmp11 = sub nsw i32 %m.0, %n.0
+  %tmp12 = getelementptr inbounds i32, i32* %data, i64 0
+  store i32 %tmp11, i32* %tmp12, align 4
+  %tmp13 = getelementptr inbounds i32, i32* %data, i64 1
+  %tmp14 = load i32, i32* %tmp13, align 4
+  %tmp15 = add nsw i32 %m.0, %tmp14
+  %tmp16 = add nsw i32 %n.0, %tmp14
+  br label %bb17
+
+bb17:                                             ; preds = %bb6
+  %tmp18 = add nsw i32 %i.0, 1
+  br label %bb4
+
+bb19:                                             ; preds = %bb4
+  ret void
+}
+
+;; Function Attrs: nounwind ssp uwtable
+;; We should eliminate the sub, one of the phi nodes, prove the store of the sub
+;; and the load of data are equivalent, that the load always produces constant 0, and
+;; delete the load replacing it with constant 0.
+define i32 @vnum_test2(i32* %data) #0 {
+; CHECK-LABEL: @vnum_test2(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[M_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB19:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP20:%.*]], [[BB19]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB21:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 [[TMP9]]
+; CHECK-NEXT:    store i32 2, i32* [[TMP10]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[DATA]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP15]] = add nsw i32 [[M_0]], [[TMP14]]
+; CHECK-NEXT:    br label [[BB19]]
+; CHECK:       bb19:
+; CHECK-NEXT:    [[TMP20]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb21:
+; CHECK-NEXT:    ret i32 0
+;
+bb:
+  %tmp = getelementptr inbounds i32, i32* %data, i64 3
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %data, i64 4
+  %tmp3 = load i32, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb19, %bb
+  %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp15, %bb19 ]
+  %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp16, %bb19 ]
+  %i.0 = phi i32 [ 0, %bb ], [ %tmp20, %bb19 ]
+  %p.0 = phi i32 [ undef, %bb ], [ %tmp18, %bb19 ]
+  %tmp5 = icmp slt i32 %i.0, %tmp1
+  br i1 %tmp5, label %bb6, label %bb21
+
+bb6:                                              ; preds = %bb4
+  %tmp7 = getelementptr inbounds i32, i32* %data, i64 2
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp9 = sext i32 %tmp8 to i64
+  %tmp10 = getelementptr inbounds i32, i32* %data, i64 %tmp9
+  store i32 2, i32* %tmp10, align 4
+  %tmp11 = sub nsw i32 %m.0, %n.0
+  %tmp12 = getelementptr inbounds i32, i32* %data, i64 0
+  store i32 %tmp11, i32* %tmp12, align 4
+  %tmp13 = getelementptr inbounds i32, i32* %data, i64 1
+  %tmp14 = load i32, i32* %tmp13, align 4
+  %tmp15 = add nsw i32 %m.0, %tmp14
+  %tmp16 = add nsw i32 %n.0, %tmp14
+  %tmp17 = getelementptr inbounds i32, i32* %data, i64 0
+  %tmp18 = load i32, i32* %tmp17, align 4
+  br label %bb19
+
+bb19:                                             ; preds = %bb6
+  %tmp20 = add nsw i32 %i.0, 1
+  br label %bb4
+
+bb21:                                             ; preds = %bb4
+  ret i32 %p.0
+}
+
+
+; Function Attrs: nounwind ssp uwtable
+;; Same as test 2, with a conditional store of m-n, so it has to also discover
+;; that data ends up with the same value no matter what branch is taken.
+define i32 @vnum_test3(i32* %data) #0 {
+; CHECK-LABEL: @vnum_test3(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 3
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[N_0:%.*]] = phi i32 [ [[TMP3]], [[BB:%.*]] ], [ [[TMP19:%.*]], [[BB21:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP22:%.*]], [[BB21]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[I_0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB6:%.*]], label [[BB23:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 5
+; CHECK-NEXT:    store i32 0, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[I_0]], 30
+; CHECK-NEXT:    br i1 [[TMP10]], label [[BB11:%.*]], label [[BB14:%.*]]
+; CHECK:       bb11:
+; CHECK-NEXT:    store i32 0, i32* [[TMP9]], align 4
+; CHECK-NEXT:    br label [[BB14]]
+; CHECK:       bb14:
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[DATA]], i64 1
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP19]] = add nsw i32 [[N_0]], [[TMP18]]
+; CHECK-NEXT:    br label [[BB21]]
+; CHECK:       bb21:
+; CHECK-NEXT:    [[TMP22]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[BB4]]
+; CHECK:       bb23:
+; CHECK-NEXT:    ret i32 0
+;
+bb:
+  %tmp = getelementptr inbounds i32, i32* %data, i64 3
+  %tmp1 = load i32, i32* %tmp, align 4
+  %tmp2 = getelementptr inbounds i32, i32* %data, i64 4
+  %tmp3 = load i32, i32* %tmp2, align 4
+  br label %bb4
+
+bb4:                                              ; preds = %bb21, %bb
+  %n.0 = phi i32 [ %tmp3, %bb ], [ %tmp20, %bb21 ]
+  %m.0 = phi i32 [ %tmp3, %bb ], [ %tmp19, %bb21 ]
+  %p.0 = phi i32 [ 0, %bb ], [ %tmp16, %bb21 ]
+  %i.0 = phi i32 [ 0, %bb ], [ %tmp22, %bb21 ]
+  %tmp5 = icmp slt i32 %i.0, %tmp1
+  br i1 %tmp5, label %bb6, label %bb23
+
+bb6:                                              ; preds = %bb4
+  %tmp7 = getelementptr inbounds i32, i32* %data, i64 2
+  %tmp8 = load i32, i32* %tmp7, align 4
+  %tmp9 = getelementptr inbounds i32, i32* %data, i64 5
+  store i32 0, i32* %tmp9, align 4
+  %tmp10 = icmp slt i32 %i.0, 30
+  br i1 %tmp10, label %bb11, label %bb14
+
+bb11:                                             ; preds = %bb6
+  %tmp12 = sub nsw i32 %m.0, %n.0
+  %tmp13 = getelementptr inbounds i32, i32* %data, i64 5
+  store i32 %tmp12, i32* %tmp13, align 4
+  br label %bb14
+
+bb14:                                             ; preds = %bb11, %bb6
+  %tmp15 = getelementptr inbounds i32, i32* %data, i64 5
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = getelementptr inbounds i32, i32* %data, i64 1
+  %tmp18 = load i32, i32* %tmp17, align 4
+  %tmp19 = add nsw i32 %m.0, %tmp18
+  %tmp20 = add nsw i32 %n.0, %tmp18
+  br label %bb21
+
+bb21:                                             ; preds = %bb14
+  %tmp22 = add nsw i32 %i.0, 1
+  br label %bb4
+
+bb23:                                             ; preds = %bb4
+  ret i32 %p.0
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0, !0, !0}
+
+!0 = !{!"Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)"}

Added: llvm/trunk/test/Transforms/NewGVN/memory-handling.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/memory-handling.ll?rev=291351&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/memory-handling.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/memory-handling.ll Sat Jan  7 10:55:14 2017
@@ -0,0 +1,195 @@
+;; This test is really dependent on propagating a lot of memory info around, but in the end, not
+;; screwing up a single add.
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.Letter = type { i32, i32, i32, i32 }
+
+ at alPhrase = external local_unnamed_addr global [26 x %struct.Letter], align 16
+ at aqMainMask = external local_unnamed_addr global [2 x i64], align 16
+ at aqMainSign = external local_unnamed_addr global [2 x i64], align 16
+ at cchPhraseLength = external local_unnamed_addr global i32, align 4
+ at auGlobalFrequency = external local_unnamed_addr global [26 x i32], align 16
+ at .str.7 = external hidden unnamed_addr constant [28 x i8], align 1
+
+; Function Attrs: nounwind uwtable
+declare void @Fatal(i8*, i32) local_unnamed_addr #0
+
+; Function Attrs: nounwind readnone
+declare i16** @__ctype_b_loc() local_unnamed_addr #1
+
+; Function Attrs: nounwind uwtable
+define void @BuildMask(i8* nocapture readonly) local_unnamed_addr #0 {
+  tail call void @llvm.memset.p0i8.i64(i8* bitcast ([26 x %struct.Letter]* @alPhrase to i8*), i8 0, i64 416, i32 16, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* bitcast ([2 x i64]* @aqMainMask to i8*), i8 0, i64 16, i32 16, i1 false)
+  tail call void @llvm.memset.p0i8.i64(i8* bitcast ([2 x i64]* @aqMainSign to i8*), i8 0, i64 16, i32 16, i1 false)
+  br label %.sink.split
+
+.sink.split:                                      ; preds = %14, %1
+  %.0 = phi i8* [ %0, %1 ], [ %.lcssa67, %14 ]
+  %.sink = phi i32 [ 0, %1 ], [ %23, %14 ]
+  store i32 %.sink, i32* @cchPhraseLength, align 4, !tbaa !1
+  br label %2
+
+; <label>:2:                                      ; preds = %6, %.sink.split
+  %.1 = phi i8* [ %.0, %.sink.split ], [ %3, %6 ]
+  %3 = getelementptr inbounds i8, i8* %.1, i64 1
+  %4 = load i8, i8* %.1, align 1, !tbaa !5
+  %5 = icmp eq i8 %4, 0
+  br i1 %5, label %.preheader.preheader, label %6
+
+.preheader.preheader:                             ; preds = %2
+  br label %.preheader
+
+; <label>:6:                                      ; preds = %2
+  %7 = tail call i16** @__ctype_b_loc() #4
+  %8 = load i16*, i16** %7, align 8, !tbaa !6
+  %9 = sext i8 %4 to i64
+  %10 = getelementptr inbounds i16, i16* %8, i64 %9
+  %11 = load i16, i16* %10, align 2, !tbaa !8
+  %12 = and i16 %11, 1024
+  %13 = icmp eq i16 %12, 0
+  br i1 %13, label %2, label %14
+
+; <label>:14:                                     ; preds = %6
+  %.lcssa67 = phi i8* [ %3, %6 ]
+  %.lcssa65 = phi i8 [ %4, %6 ]
+  %15 = sext i8 %.lcssa65 to i32
+  %16 = tail call i32 @tolower(i32 %15) #5
+  %17 = add nsw i32 %16, -97
+  %18 = sext i32 %17 to i64
+  %19 = getelementptr inbounds [26 x %struct.Letter], [26 x %struct.Letter]* @alPhrase, i64 0, i64 %18, i32 0
+  %20 = load i32, i32* %19, align 16, !tbaa !10
+  %21 = add i32 %20, 1
+  store i32 %21, i32* %19, align 16, !tbaa !10
+  %22 = load i32, i32* @cchPhraseLength, align 4, !tbaa !1
+  %23 = add nsw i32 %22, 1
+  br label %.sink.split
+
+.preheader:                                       ; preds = %58, %.preheader.preheader
+  %indvars.iv = phi i64 [ 0, %.preheader.preheader ], [ %indvars.iv.next, %58 ]
+  %.04961 = phi i32 [ %.2, %58 ], [ 0, %.preheader.preheader ]
+  %.05160 = phi i32 [ %.253, %58 ], [ 0, %.preheader.preheader ]
+  %24 = getelementptr inbounds [26 x %struct.Letter], [26 x %struct.Letter]* @alPhrase, i64 0, i64 %indvars.iv, i32 0
+  %25 = load i32, i32* %24, align 16, !tbaa !10
+  %26 = icmp eq i32 %25, 0
+  %27 = getelementptr inbounds [26 x i32], [26 x i32]* @auGlobalFrequency, i64 0, i64 %indvars.iv
+  br i1 %26, label %28, label %29
+
+; <label>:28:                                     ; preds = %.preheader
+  store i32 -1, i32* %27, align 4, !tbaa !1
+  br label %58
+
+; <label>:29:                                     ; preds = %.preheader
+  store i32 0, i32* %27, align 4, !tbaa !1
+  %30 = zext i32 %25 to i64
+  br i1 false, label %._crit_edge, label %.lr.ph.preheader
+
+.lr.ph.preheader:                                 ; preds = %29
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
+  %.04658 = phi i64 [ %32, %.lr.ph ], [ 1, %.lr.ph.preheader ]
+  %.04857 = phi i32 [ %31, %.lr.ph ], [ 1, %.lr.ph.preheader ]
+  %31 = add nuw nsw i32 %.04857, 1
+  %32 = shl i64 %.04658, 1
+  %33 = icmp ult i64 %30, %32
+  br i1 %33, label %._crit_edge.loopexit, label %.lr.ph
+
+._crit_edge.loopexit:                             ; preds = %.lr.ph
+  %.lcssa63 = phi i32 [ %31, %.lr.ph ]
+  %.lcssa = phi i64 [ %32, %.lr.ph ]
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %29
+  %.048.lcssa = phi i32 [ 1, %29 ], [ %.lcssa63, %._crit_edge.loopexit ]
+  %.046.lcssa = phi i64 [ 1, %29 ], [ %.lcssa, %._crit_edge.loopexit ]
+  %34 = add nsw i32 %.048.lcssa, %.04961
+  %35 = icmp ugt i32 %34, 64
+  br i1 %35, label %36, label %40
+
+; <label>:36:                                     ; preds = %._crit_edge
+; This testcase essentially comes down to this little add.
+; If we screw up the revisitation of the users of store of %sink above
+; we will end up propagating and simplifying this to 1 in the final output
+; because we keep an optimistic assumption we should not.
+; CHECK:  add i32 %.05160, 1
+  %37 = add i32 %.05160, 1
+  %38 = icmp ugt i32 %37, 1
+  br i1 %38, label %39, label %40
+
+; <label>:39:                                     ; preds = %36
+  tail call void @Fatal(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.7, i64 0, i64 0), i32 0)
+  br label %40
+
+; <label>:40:                                     ; preds = %39, %36, %._crit_edge
+  %.152 = phi i32 [ %.05160, %._crit_edge ], [ %37, %39 ], [ %37, %36 ]
+  %.150 = phi i32 [ %.04961, %._crit_edge ], [ 0, %39 ], [ 0, %36 ]
+  %41 = add i64 %.046.lcssa, 4294967295
+  %42 = trunc i64 %41 to i32
+  %43 = getelementptr inbounds [26 x %struct.Letter], [26 x %struct.Letter]* @alPhrase, i64 0, i64 %indvars.iv, i32 2
+  store i32 %42, i32* %43, align 8, !tbaa !12
+  %44 = zext i32 %.150 to i64
+  %.046. = shl i64 %.046.lcssa, %44
+  %45 = zext i32 %.152 to i64
+  %46 = getelementptr inbounds [2 x i64], [2 x i64]* @aqMainSign, i64 0, i64 %45
+  %47 = load i64, i64* %46, align 8, !tbaa !13
+  %48 = or i64 %47, %.046.
+  store i64 %48, i64* %46, align 8, !tbaa !13
+  %49 = load i32, i32* %24, align 16, !tbaa !10
+  %50 = zext i32 %49 to i64
+  %51 = shl i64 %50, %44
+  %52 = getelementptr inbounds [2 x i64], [2 x i64]* @aqMainMask, i64 0, i64 %45
+  %53 = load i64, i64* %52, align 8, !tbaa !13
+  %54 = or i64 %51, %53
+  store i64 %54, i64* %52, align 8, !tbaa !13
+  %55 = getelementptr inbounds [26 x %struct.Letter], [26 x %struct.Letter]* @alPhrase, i64 0, i64 %indvars.iv, i32 1
+  store i32 %.150, i32* %55, align 4, !tbaa !15
+  %56 = getelementptr inbounds [26 x %struct.Letter], [26 x %struct.Letter]* @alPhrase, i64 0, i64 %indvars.iv, i32 3
+  store i32 %.152, i32* %56, align 4, !tbaa !16
+  %57 = add nsw i32 %.150, %.048.lcssa
+  br label %58
+
+; <label>:58:                                     ; preds = %40, %28
+  %.253 = phi i32 [ %.05160, %28 ], [ %.152, %40 ]
+  %.2 = phi i32 [ %.04961, %28 ], [ %57, %40 ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 26
+  br i1 %exitcond, label %.preheader, label %59
+
+; <label>:59:                                     ; preds = %58
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #2
+
+; Function Attrs: inlinehint nounwind readonly uwtable
+declare i32 @tolower(i32) local_unnamed_addr #3
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { argmemonly nounwind }
+attributes #3 = { inlinehint nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind readnone }
+attributes #5 = { nounwind readonly }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 9b9db7fa41a1905899dbcbcc6cbdd05d2511da8e) (/Users/dannyb/sources/llvm-clean a3908a41623f6ac14ba8c04613d6c64e0544bb5d)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!3, !3, i64 0}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"any pointer", !3, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"short", !3, i64 0}
+!10 = !{!11, !2, i64 0}
+!11 = !{!"", !2, i64 0, !2, i64 4, !2, i64 8, !2, i64 12}
+!12 = !{!11, !2, i64 8}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"long", !3, i64 0}
+!15 = !{!11, !2, i64 4}
+!16 = !{!11, !2, i64 12}




More information about the llvm-commits mailing list