[llvm] r266200 - Simplify strlen to a subtraction for certain cases.

David L Kreitzer via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 13 07:31:06 PDT 2016


Author: dlkreitz
Date: Wed Apr 13 09:31:06 2016
New Revision: 266200

URL: http://llvm.org/viewvc/llvm-project?rev=266200&view=rev
Log:
Simplify strlen to a subtraction for certain cases.

Patch by Li Huang (li1.huang at intel.com)

Differential Revision: http://reviews.llvm.org/D18230

Modified:
    llvm/trunk/include/llvm/Analysis/ValueTracking.h
    llvm/trunk/lib/Analysis/ValueTracking.cpp
    llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/strlen-1.ll

Modified: llvm/trunk/include/llvm/Analysis/ValueTracking.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ValueTracking.h?rev=266200&r1=266199&r2=266200&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/ValueTracking.h (original)
+++ llvm/trunk/include/llvm/Analysis/ValueTracking.h Wed Apr 13 09:31:06 2016
@@ -26,6 +26,7 @@ namespace llvm {
   class AssumptionCache;
   class DataLayout;
   class DominatorTree;
+  class GEPOperator;
   class Instruction;
   class Loop;
   class LoopInfo;
@@ -183,6 +184,10 @@ namespace llvm {
                                             DL);
   }
 
+  /// Returns true if the GEP is based on a pointer to a string (array of i8), 
+  /// and is indexing into this string.
+  bool isGEPBasedOnPointerToString(const GEPOperator *GEP);
+
   /// getConstantStringInfo - This function computes the length of a
   /// null-terminated C string pointed to by V.  If successful, it returns true
   /// and returns the string in Str.  If unsuccessful, it returns false.  This

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=266200&r1=266199&r2=266200&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Wed Apr 13 09:31:06 2016
@@ -2650,6 +2650,24 @@ Value *llvm::GetPointerBaseWithConstantO
   return Ptr;
 }
 
+bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) {
+  // Make sure the GEP has exactly three arguments.
+  if (GEP->getNumOperands() != 3)
+    return false;
+
+  // Make sure the index-ee is a pointer to array of i8.
+  ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
+  if (!AT || !AT->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Check to make sure that the first operand of the GEP is an integer and
+  // has value 0 so that we are sure we're indexing into the initializer.
+  const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+  if (!FirstIdx || !FirstIdx->isZero())
+    return false;
+
+  return true;
+} 
 
 /// This function computes the length of a null-terminated C string pointed to
 /// by V. If successful, it returns true and returns the string in Str.
@@ -2664,19 +2682,9 @@ bool llvm::getConstantStringInfo(const V
   // If the value is a GEP instruction or constant expression, treat it as an
   // offset.
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-    // Make sure the GEP has exactly three arguments.
-    if (GEP->getNumOperands() != 3)
-      return false;
-
-    // Make sure the index-ee is a pointer to array of i8.
-    ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
-    if (!AT || !AT->getElementType()->isIntegerTy(8))
-      return false;
-
-    // Check to make sure that the first operand of the GEP is an integer and
-    // has value 0 so that we are sure we're indexing into the initializer.
-    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
-    if (!FirstIdx || !FirstIdx->isZero())
+    // The GEP operator should be based on a pointer to string constant, and is
+    // indexing into the string constant.
+    if (!isGEPBasedOnPointerToString(GEP))
       return false;
 
     // If the second index isn't a ConstantInt, then this is a variable index

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp?rev=266200&r1=266199&r2=266200&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp Wed Apr 13 09:31:06 2016
@@ -535,6 +535,57 @@ Value *LibCallSimplifier::optimizeStrLen
   if (uint64_t Len = GetStringLength(Src))
     return ConstantInt::get(CI->getType(), Len - 1);
 
+  // If s is a constant pointer pointing to a string literal, we can fold
+  // strlen(s + x) to strlen(s) - x, when x is known to be in the range 
+  // [0, strlen(s)] or the string has a single null terminator '\0' at the end.
+  // We only try to simplify strlen when the pointer s points to an array 
+  // of i8. Otherwise, we would need to scale the offset x before doing the
+  // subtraction. This will make the optimization more complex, and it's not 
+  // very useful because calling strlen for a pointer of other types is 
+  // very uncommon.
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
+    if (!isGEPBasedOnPointerToString(GEP))
+      return nullptr;
+
+    StringRef Str;
+    if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) {
+      size_t NullTermIdx = Str.find('\0');
+      
+      // If the string does not have '\0', leave it to strlen to compute
+      // its length.
+      if (NullTermIdx == StringRef::npos)
+        return nullptr;
+     
+      Value *Offset = GEP->getOperand(2);
+      unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, 
+                       nullptr);
+      KnownZero.flipAllBits();
+      size_t ArrSize = 
+             cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
+
+      // KnownZero's bits are flipped, so zeros in KnownZero now represent 
+      // bits known to be zeros in Offset, and ones in KnowZero represent 
+      // bits unknown in Offset. Therefore, Offset is known to be in range
+      // [0, NullTermIdx] when the flipped KnownZero is non-negative and 
+      // unsigned-less-than NullTermIdx.
+      //
+      // If Offset is not provably in the range [0, NullTermIdx], we can still 
+      // optimize if we can prove that the program has undefined behavior when 
+      // Offset is outside that range. That is the case when GEP->getOperand(0) 
+      // is a pointer to an object whose memory extent is NullTermIdx+1.
+      if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || 
+          (GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
+           NullTermIdx == ArrSize - 1))
+        return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), 
+                           Offset);
+    }
+
+    return nullptr;
+  }
+
   // strlen(x?"foo":"bars") --> x ? 3 : 4
   if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
     uint64_t LenTrue = GetStringLength(SI->getTrueValue());

Modified: llvm/trunk/test/Transforms/InstCombine/strlen-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/strlen-1.ll?rev=266200&r1=266199&r2=266200&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/strlen-1.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/strlen-1.ll Wed Apr 13 09:31:06 2016
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; Test that the strlen library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
@@ -113,9 +113,8 @@ define i32 @test_simplify9(i1 %x) {
 
 define i32 @test_simplify10(i32 %x) {
 ; CHECK-LABEL: @test_simplify10(
-; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 %x
-; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
-; CHECK-NEXT:    ret i32 [[HELLO_L]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 5, %x
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %hello_p = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 %x
   %hello_l = call i32 @strlen(i8* %hello_p)
@@ -127,9 +126,8 @@ define i32 @test_simplify10(i32 %x) {
 define i32 @test_simplify11(i32 %x) {
 ; CHECK-LABEL: @test_simplify11(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 %x, 7
-; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 [[AND]]
-; CHECK-NEXT:    [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
-; CHECK-NEXT:    ret i32 [[HELLO_L]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw i32 9, [[AND]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %and = and i32 %x, 7
   %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and




More information about the llvm-commits mailing list