[llvm-commits] [llvm] r92411 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/load-cmp.ll

Chris Lattner sabre at nondot.org
Sat Jan 2 00:12:07 PST 2010


Author: lattner
Date: Sat Jan  2 02:12:04 2010
New Revision: 92411

URL: http://llvm.org/viewvc/llvm-project?rev=92411&view=rev
Log:
Teach instcombine to fold compares of loads from constant
arrays with variable indices into a comparison of the index
with a constant.  The most common occurrence of this that
I see by far is stuff like:

if ("foobar"[i] == '\0') ...

which we compile into: if (i == 6), saving a load and 
materialization of the global address.  This also exposes 
loop trip count information to later passes in many cases.

This triggers hundreds of times in xalancbmk, which is where I first
noticed it, but it also triggers in many other apps.  Here are a few 
interesting ones from various apps:

@must_be_connected_without = internal constant [8 x i8*] [i8* getelementptr inbounds ([3 x i8]* @.str64320, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str27283, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str71327, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str72328, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str18274, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str11267, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str32288, i64 0, i64 0), i8* null], align 32 ; <[8 x i8*]*> [#uses=2]
  %scevgep.i = getelementptr [8 x i8*]* @must_be_connected_without, i64 0, i64 %indvar.i ; <i8**> [#uses=1]
  %17 = load ...
  %18 = icmp eq i8* %17, null                     ; <i1> [#uses=1]
-> icmp eq i64 %indvar.i, 7 


@yytable1095 = internal constant [84 x i8] c"\12\01(\05\06\07\08\09\0A\0B\0C\0D\0E1\0F\10\11266\1D: \10\11,-,0\03'\10\11B6\04\17&\18\1945\05\06\07\08\09\0A\0B\0C\0D\0E\1E\0F\10\11*\1A\1B\1C$3+>#%;<IJ=ADFEGH9KL\00\00\00C", align 32 ; <[84 x i8]*> [#uses=2]
  %57 = getelementptr inbounds [84 x i8]* @yytable1095, i64 0, i64 %56 ; <i8*> [#uses=1]
   %mode.0.in = getelementptr inbounds [9 x i32]* @mb_mode_table, i64 0, i64 %.pn ; <i32*> [#uses=1]
load ...
   %64 = icmp eq i8 %58, 4                         ; <i1> [#uses=1]
-> icmp eq i64 %.pn, 35             ; <i1> [#uses=0]


@gsm_DLB = internal constant [4 x i16] [i16 6554, i16 16384, i16 26214, i16 32767]
%scevgep.i = getelementptr [4 x i16]* @gsm_DLB, i64 0, i64 %indvar.i ; <i16*> [#uses=1]
%425 = load %scevgep.i
%426 = icmp eq i16 %425, -32768                 ; <i1> [#uses=0]
-> false


Added:
    llvm/trunk/test/Transforms/InstCombine/load-cmp.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=92411&r1=92410&r2=92411&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Sat Jan  2 02:12:04 2010
@@ -258,6 +258,8 @@
     Instruction *commonShiftTransforms(BinaryOperator &I);
     Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
                                       Constant *RHSC);
+    Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+                                              GlobalVariable *GV, CmpInst &ICI);
     Instruction *visitFCmpInst(FCmpInst &I);
     Instruction *visitICmpInst(ICmpInst &I);
     Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
@@ -6015,6 +6017,112 @@
   return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
 }
 
+
+/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
+///   cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer.  Try to simplify
+/// this into one or two simpler comparisons that do not need the load.  For
+/// example, we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into
+/// "icmp eq i, 3".  We assume that eliminating a load is always goodness.
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+                             CmpInst &ICI) {
+  
+  // There are many forms of this optimization we can handle, for now, just do
+  // the simple index into a single-dimensional array.
+  //
+  // Require: GEP GV, 0, i
+  if (GEP->getNumOperands() != 3 ||
+      !isa<ConstantInt>(GEP->getOperand(1)) ||
+      !cast<ConstantInt>(GEP->getOperand(1))->isZero())
+    return 0;
+  
+  ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+  
+  
+  // Variables for our state machines.
+  
+  // OnlyTrueElement - Used to emit a comparison of "i == 47", where 47 is the
+  // only index the condition is true for.  The values are -1 -> undef,
+  // -2 -> overdef, >= 0 -> that index is true.
+  int OnlyTrueElement = -1;
+
+  // OnlyFalseElement - Used to emit a comparison of "i != 47", where 47 is the
+  // only index the condition is false for.  The values are -1 -> undef,
+  // -2 -> overdef, >= 0 -> that index is false.
+  int OnlyFalseElement = -1;
+  
+  // Scan the array and see if one of our patterns matches.
+  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+  for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+    // Find out if the comparison would be true or false for the i'th element.
+    Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(),
+                                                  Init->getOperand(i),
+                                                  CompareRHS, TD);
+    // If the result is undef for this element, ignore it.
+    if (isa<UndefValue>(C)) continue;
+    
+    // If we can't compute the result for any of the elements, we have to give
+    // up evaluating the entire conditional.
+    if (!isa<ConstantInt>(C)) return 0;
+    
+    // Otherwise, we know if the comparison is true or false for this element,
+    // update our state machines.
+    bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+    
+    // State machine for single index comparison.
+    if (IsTrueForElt) {
+      // If undefined -> defined.  Otherwise -> overdefined.
+      OnlyTrueElement = OnlyTrueElement == -1 ? i : -2;
+    } else {
+      // If undefined -> defined.  Otherwise -> overdefined.
+      OnlyFalseElement = OnlyFalseElement == -1 ? i : -2;
+    }
+    
+    // If all of our states become overdefined, bail out early.
+    if (OnlyTrueElement == -2 && OnlyFalseElement == -2)
+      return 0;
+  }
+
+  // Now that we've scanned the entire array, emit our new comparison(s).  We
+  // order the state machines in complexity of the generated code.
+  if (OnlyTrueElement != -2) {
+    // None true -> false.
+    if (OnlyTrueElement == -1)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
+    
+    // True for one element -> 'i == 47'.
+    return new ICmpInst(ICmpInst::ICMP_EQ, GEP->getOperand(2),
+                        ConstantInt::get(GEP->getOperand(2)->getType(),
+                                         OnlyTrueElement));
+  }
+
+  if (OnlyFalseElement != -2) {
+    // None false -> true.
+    if (OnlyFalseElement == -1)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
+    
+    return new ICmpInst(ICmpInst::ICMP_NE, GEP->getOperand(2),
+                        ConstantInt::get(GEP->getOperand(2)->getType(),
+                                         OnlyFalseElement));
+  }
+  
+  assert(0 && "Should have bailed out early");
+  
+  // TODO: FCMP.
+  
+  // TODO: Range check.
+  
+  // TODO: If the global array has 32 (or 64 if native!) or less entries, we
+  // can turn this into something like:
+  //  ((magicbitconstant >> i) & 1) != 0)
+  // where we populate magicbitconstant with 0101010 based on the comparison
+  // results.
+  return 0;
+}
+
+
 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   bool Changed = false;
   
@@ -6175,7 +6283,7 @@
     Value *A = 0, *B = 0;
     
     // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
-    if (I.isEquality() && CI->isNullValue() &&
+    if (I.isEquality() && CI->isZero() &&
         match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
       // (icmp cond A B) if cond is equality
       return new ICmpInst(I.getPredicate(), A, B);
@@ -6475,6 +6583,17 @@
           return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
                         Constant::getNullValue(LHSI->getOperand(0)->getType()));
         break;
+
+      case Instruction::Load:
+        if (GetElementPtrInst *GEP =
+              dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+                !cast<LoadInst>(LHSI)->isVolatile()) {
+              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+                return Res;
+            }
+        break;
       }
   }
 

Added: llvm/trunk/test/Transforms/InstCombine/load-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/load-cmp.ll?rev=92411&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/load-cmp.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/load-cmp.ll Sat Jan  2 02:12:04 2010
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+ at G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
+                                     i16 73, i16 82, i16 69, i16 68, i16 0]
+
+define i1 @test1(i32 %X) {
+  %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; CHECK: @test1
+; CHECK-NEXT: %R = icmp eq i32 %X, 9
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test2(i32 %X) {
+  %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp slt i16 %Q, 85
+  ret i1 %R
+; CHECK: @test2
+; CHECK-NEXT: %R = icmp ne i32 %X, 4
+; CHECK-NEXT: ret i1 %R
+}
+





More information about the llvm-commits mailing list