[llvm-commits] [llvm] r92426 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/load-cmp.ll

Chris Lattner sabre at nondot.org
Sat Jan 2 13:50:25 PST 2010


Author: lattner
Date: Sat Jan  2 15:50:18 2010
New Revision: 92426

URL: http://llvm.org/viewvc/llvm-project?rev=92426&view=rev
Log:
Teach the table lookup optimization to generate range compares
when a consequtive sequence of elements all satisfies the 
predicate.  Like the double compare case, this generates better
code than the magic constant case and generalizes to more than
32/64 element array lookups.

Here are some examples where it triggers.  From 403.gcc, most
accesses to the rtx_class array are handled, e.g.:

@rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=547]
   %142 = icmp eq i8 %141, 105
@rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=543]
	   %165 = icmp eq i8 %164, 60      

Also, most of the 59-element arrays (mode_class/rid_to_yy, etc) 
optimized before are actually range compares.  This lets 32-bit
machines optimize them.

400.perlbmk has stuff like this:

400.perlbmk: PL_regkind, even for 32-bit:
@PL_regkind = constant [62 x i8] c"\00\00\02\02\02\06\06\06\06\09\09\0B\0B\0D\0E\0E\0E\11\12\12\14\14\16\16\18\18\1A\1A\1C\1C\1E\1F !!!$$&'((((,-.///88886789:;8$", align 32 ; <[62 x i8]*> [#uses=4]
	   %811 = icmp ne i8 %810, 33 

@PL_utf8skip = constant [256 x i8] c"\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\04\04\04\04\04\04\04\04\05\05\05\05\06\06\07\0D", align 32 ; <[256 x i8]*> [#uses=94]
	   %12 = icmp ult i8 %10, 2
           
etc.


Modified:
    llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp
    llvm/trunk/test/Transforms/InstCombine/load-cmp.ll

Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=92426&r1=92425&r2=92426&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Sat Jan  2 15:50:18 2010
@@ -6053,6 +6053,14 @@
   // form "i != 47 & i != 87".  Same state transitions as for true elements.
   int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
   
+  /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+  /// define a state machine that triggers for ranges of values that the index
+  /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
+  /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+  /// index in the range (inclusive).  We use -2 for undefined here because we
+  /// use relative comparisons and don't want 0-1 to match -1.
+  int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+  
   // MagicBitvector - This is a magic bitvector where we set a bit if the
   // comparison is true for element 'i'.  If there are 64 elements or less in
   // the array, this will fully represent all the comparison results.
@@ -6067,7 +6075,15 @@
                                                   Init->getOperand(i),
                                                   CompareRHS, TD);
     // If the result is undef for this element, ignore it.
-    if (isa<UndefValue>(C)) continue;
+    if (isa<UndefValue>(C)) {
+      // Extend range state machines to cover this element in case there is an
+      // undef in the middle of the range.
+      if (TrueRangeEnd == (int)i-1)
+        TrueRangeEnd = i;
+      if (FalseRangeEnd == (int)i-1)
+        FalseRangeEnd = i;
+      continue;
+    }
     
     // If we can't compute the result for any of the elements, we have to give
     // up evaluating the entire conditional.
@@ -6077,32 +6093,54 @@
     // update our state machines.
     bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
     
-    // State machine for single index comparison.
+    // State machine for single/double/range index comparison.
     if (IsTrueForElt) {
       // Update the TrueElement state machine.
       if (FirstTrueElement == Undefined)
-        FirstTrueElement = i;
-      else if (SecondTrueElement == Undefined)
-        SecondTrueElement = i;
-      else
-        SecondTrueElement = Overdefined;
+        FirstTrueElement = TrueRangeEnd = i;  // First true element.
+      else {
+        // Update double-compare state machine.
+        if (SecondTrueElement == Undefined)
+          SecondTrueElement = i;
+        else
+          SecondTrueElement = Overdefined;
+        
+        // Update range state machine.
+        if (TrueRangeEnd == (int)i-1)
+          TrueRangeEnd = i;
+        else
+          TrueRangeEnd = Overdefined;
+      }
     } else {
       // Update the FalseElement state machine.
       if (FirstFalseElement == Undefined)
-        FirstFalseElement = i;
-      else if (SecondFalseElement == Undefined)
-        SecondFalseElement = i;
-      else
-        SecondFalseElement = Overdefined;
+        FirstFalseElement = FalseRangeEnd = i; // First false element.
+      else {
+        // Update double-compare state machine.
+        if (SecondFalseElement == Undefined)
+          SecondFalseElement = i;
+        else
+          SecondFalseElement = Overdefined;
+        
+        // Update range state machine.
+        if (FalseRangeEnd == (int)i-1)
+          FalseRangeEnd = i;
+        else
+          FalseRangeEnd = Overdefined;
+      }
     }
     
+    
     // If this element is in range, update our magic bitvector.
     if (i < 64 && IsTrueForElt)
       MagicBitvector |= 1ULL << i;
     
-    // If all of our states become overdefined, bail out early.
-    if (i >= 64 && SecondTrueElement == Overdefined &&
-        SecondFalseElement == Overdefined)
+    // If all of our states become overdefined, bail out early.  Since the
+    // predicate is expensive, only check it every 8 elements.  This is only
+    // really useful for really huge arrays.
+    if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+        SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+        FalseRangeEnd == Overdefined)
       return 0;
   }
 
@@ -6110,6 +6148,7 @@
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
+  
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
   if (SecondTrueElement != Overdefined) {
@@ -6150,6 +6189,37 @@
     return BinaryOperator::CreateAnd(C1, C2);
   }
   
+  // If the comparison can be replaced with a range comparison for the elements
+  // where it is true, emit the range check.
+  if (TrueRangeEnd != Overdefined) {
+    assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+    
+    // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+    if (FirstTrueElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  TrueRangeEnd-FirstTrueElement+1);
+    return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+  }
+  
+  // False range check.
+  if (FalseRangeEnd != Overdefined) {
+    assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+    // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+    if (FirstFalseElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  FalseRangeEnd-FirstFalseElement);
+    return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+  }
+  
+  
   // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
   // of this load, replace it with computation that does:
   //   ((magic_cst >> i) & 1) != 0
@@ -6166,14 +6236,8 @@
     return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
   }
   
-  // TODO: Range check
-  // TODO: GEP 0, i, 4
   // TODO: A[i]&4 == 0
-  
-  //errs() << "XFORM: " << *GV << "\n";
-  //errs() << "\t" << *GEP << "\n";
-  //errs() << "\t " << ICI << "\n\n\n\n";
-  
+  // TODO: GEP 0, i, 4
   
   return 0;
 }

Modified: llvm/trunk/test/Transforms/InstCombine/load-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/load-cmp.ll?rev=92426&r1=92425&r2=92426&view=diff

==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/load-cmp.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/load-cmp.ll Sat Jan  2 15:50:18 2010
@@ -2,7 +2,8 @@
 
 @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
                                      i16 73, i16 82, i16 69, i16 68, i16 0]
- at GD = internal constant [3 x double] [double 1.0, double 4.0, double -20.0]
+ at GD = internal constant [6 x double]
+   [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
 
 define i1 @test1(i32 %X) {
   %P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
@@ -25,12 +26,12 @@
 }
 
 define i1 @test3(i32 %X) {
-  %P = getelementptr [3 x double]* @GD, i32 0, i32 %X
+  %P = getelementptr [6 x double]* @GD, i32 0, i32 %X
   %Q = load double* %P
   %R = fcmp oeq double %Q, 1.0
   ret i1 %R
 ; CHECK: @test3
-; CHECK-NEXT: %R = icmp eq i32 %X, 0
+; CHECK-NEXT: %R = icmp eq i32 %X, 1
 ; CHECK-NEXT: ret i1 %R
 }
 
@@ -57,3 +58,25 @@
 ; CHECK-NEXT: %R = or i1
 ; CHECK-NEXT: ret i1 %R
 }
+
+define i1 @test6(i32 %X) {
+  %P = getelementptr [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp ogt double %Q, 0.0
+  ret i1 %R
+; CHECK: @test6
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test7(i32 %X) {
+  %P = getelementptr [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp olt double %Q, 0.0
+  ret i1 %R
+; CHECK: @test7
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; CHECK-NEXT: ret i1 %R
+}





More information about the llvm-commits mailing list