[llvm] a7df02f - [InstCombine] Make strlen optimization more resilient to different gep types. (#153623)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 19 02:37:21 PDT 2025


Author: David Green
Date: 2025-08-19T10:37:17+01:00
New Revision: a7df02f83c74c7229a09b2f89a51b003075560ab

URL: https://github.com/llvm/llvm-project/commit/a7df02f83c74c7229a09b2f89a51b003075560ab
DIFF: https://github.com/llvm/llvm-project/commit/a7df02f83c74c7229a09b2f89a51b003075560ab.diff

LOG: [InstCombine] Make strlen optimization more resilient to different gep types. (#153623)

This makes the optimization in optimizeStringLength for strlen(gep
@glob, %x) -> sub endof at glob, %x a little more resilient, and maybe a
bit more correct for geps with non-array types.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ValueTracking.h
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/test/Transforms/InstCombine/strlen-1.ll
    llvm/test/Transforms/InstCombine/wcslen-1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 02990a3cb44f7..15ff129deda13 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -359,11 +359,6 @@ GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
                                           AllowNonInbounds);
 }
 
-/// Returns true if the GEP is based on a pointer to a string (array of
-// \p CharSize integers) and is indexing into this string.
-LLVM_ABI bool isGEPBasedOnPointerToString(const GEPOperator *GEP,
-                                          unsigned CharSize = 8);
-
 /// Represents offset+length into a ConstantDataArray.
 struct ConstantDataArraySlice {
   /// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid

diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 50e43a53def6c..8ea3a03b1b4ab 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6356,27 +6356,6 @@ llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
   return nullptr;
 }
 
-bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
-                                       unsigned CharSize) {
-  // Make sure the GEP has exactly three arguments.
-  if (GEP->getNumOperands() != 3)
-    return false;
-
-  // Make sure the index-ee is a pointer to array of \p CharSize integers.
-  // CharSize.
-  ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
-  if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
-    return false;
-
-  // Check to make sure that the first operand of the GEP is an integer and
-  // has value 0 so that we are sure we're indexing into the initializer.
-  const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
-  if (!FirstIdx || !FirstIdx->isZero())
-    return false;
-
-  return true;
-}
-
 // If V refers to an initialized global constant, set Slice either to
 // its initializer if the size of its elements equals ElementSize, or,
 // for ElementSize == 8, to its representation as an array of unsiged

diff  --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cc4eb2d1df8ca..2d6a748f45079 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/AttributeMask.h"
 #include "llvm/IR/DataLayout.h"
@@ -977,8 +978,14 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
   // it's not very useful because calling strlen for a pointer of other types is
   // very uncommon.
   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
-    // TODO: Handle subobjects.
-    if (!isGEPBasedOnPointerToString(GEP, CharSize))
+    unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+    SmallMapVector<Value *, APInt, 4> VarOffsets;
+    APInt ConstOffset(BW, 0);
+    assert(CharSize % 8 == 0 && "Expected a multiple of 8 sized CharSize");
+    // Check the gep is a single variable offset.
+    if (!GEP->collectOffset(DL, BW, VarOffsets, ConstOffset) ||
+        VarOffsets.size() != 1 || ConstOffset != 0 ||
+        VarOffsets.begin()->second != CharSize / 8)
       return nullptr;
 
     ConstantDataArraySlice Slice;
@@ -1000,10 +1007,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
           return nullptr;
       }
 
-      Value *Offset = GEP->getOperand(2);
+      Value *Offset = VarOffsets.begin()->first;
       KnownBits Known = computeKnownBits(Offset, DL, nullptr, CI, nullptr);
-      uint64_t ArrSize =
-             cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
 
       // If Offset is not provably in the range [0, NullTermIdx], we can still
       // optimize if we can prove that the program has undefined behavior when
@@ -1011,7 +1016,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
       // is a pointer to an object whose memory extent is NullTermIdx+1.
       if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) ||
           (isa<GlobalVariable>(GEP->getOperand(0)) &&
-           NullTermIdx == ArrSize - 1)) {
+           NullTermIdx == Slice.Length - 1)) {
         Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
         return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
                            Offset);

diff  --git a/llvm/test/Transforms/InstCombine/strlen-1.ll b/llvm/test/Transforms/InstCombine/strlen-1.ll
index 4616767997512..27f9e4ace3a7b 100644
--- a/llvm/test/Transforms/InstCombine/strlen-1.ll
+++ b/llvm/test/Transforms/InstCombine/strlen-1.ll
@@ -116,6 +116,25 @@ define i32 @test_simplify10_inbounds(i32 %x) {
   ret i32 %hello_l
 }
 
+define i32 @test_simplify10_inbounds_i8gep(i32 %x) {
+; CHECK-LABEL: @test_simplify10_inbounds_i8gep(
+; CHECK-NEXT:    [[HELLO_L:%.*]] = sub i32 5, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds i8, ptr @hello, i32 %x
+  %hello_l = call i32 @strlen(ptr %hello_p)
+  ret i32 %hello_l
+}
+
+define i32 @test_simplify10_inbounds_i8gep_const() {
+; CHECK-LABEL: @test_simplify10_inbounds_i8gep_const(
+; CHECK-NEXT:    ret i32 3
+;
+  %hello_p = getelementptr inbounds i8, ptr @hello, i32 2
+  %hello_l = call i32 @strlen(ptr %hello_p)
+  ret i32 %hello_l
+}
+
 define i32 @test_simplify10_no_inbounds(i32 %x) {
 ; CHECK-LABEL: @test_simplify10_no_inbounds(
 ; CHECK-NEXT:    [[HELLO_L:%.*]] = sub i32 5, [[X:%.*]]
@@ -126,6 +145,16 @@ define i32 @test_simplify10_no_inbounds(i32 %x) {
   ret i32 %hello_l
 }
 
+define i32 @test_simplify10_no_inbounds_i8gep(i32 %x) {
+; CHECK-LABEL: @test_simplify10_no_inbounds_i8gep(
+; CHECK-NEXT:    [[HELLO_L:%.*]] = sub i32 5, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[HELLO_L]]
+;
+  %hello_p = getelementptr i8, ptr @hello, i32 %x
+  %hello_l = call i32 @strlen(ptr %hello_p)
+  ret i32 %hello_l
+}
+
 ; strlen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
 
 define i32 @test_simplify11(i32 %x) {

diff  --git a/llvm/test/Transforms/InstCombine/wcslen-1.ll b/llvm/test/Transforms/InstCombine/wcslen-1.ll
index 8a2b66d7bd8d3..ffd62564f1dde 100644
--- a/llvm/test/Transforms/InstCombine/wcslen-1.ll
+++ b/llvm/test/Transforms/InstCombine/wcslen-1.ll
@@ -118,6 +118,63 @@ define i64 @test_simplify10(i32 %x) {
   ret i64 %hello_l
 }
 
+define i64 @test_simplify10_gepi32(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi32(
+; CHECK-NEXT:    [[HELLO_L:%.*]] = sub i64 5, [[X:%.*]]
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds i32, ptr @hello, i64 %x
+  %hello_l = call i64 @wcslen(ptr %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi64(i32 %x) {
+; CHECK-LABEL: @test_simplify10_gepi64(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds i64, ptr @hello, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds i64, ptr @hello, i32 %x
+  %hello_l = call i64 @wcslen(ptr %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi16(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi16(
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds i16, ptr @hello, i64 [[X:%.*]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds i16, ptr @hello, i64 %x
+  %hello_l = call i64 @wcslen(ptr %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi8(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi8(
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds i8, ptr @hello, i64 [[TMP1:%.*]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %hello_p = getelementptr inbounds i8, ptr @hello, i64 %x
+  %hello_l = call i64 @wcslen(ptr %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi8mul4(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi8mul4(
+; CHECK-NEXT:    [[Y:%.*]] = shl i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds i8, ptr @hello, i64 [[Y]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %y = mul i64 %x, 4
+  %hello_p = getelementptr inbounds i8, ptr @hello, i64 %y
+  %hello_l = call i64 @wcslen(ptr %hello_p)
+  ret i64 %hello_l
+}
+
 ; wcslen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
 
 define i64 @test_simplify11(i32 %x) {
@@ -133,6 +190,33 @@ define i64 @test_simplify11(i32 %x) {
   ret i64 %hello_l
 }
 
+define i64 @test_simplify11_gepi32(i32 %x) {
+; CHECK-LABEL: @test_simplify11_gepi32(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[NARROW:%.*]] = sub nuw nsw i32 9, [[AND]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = zext nneg i32 [[NARROW]] to i64
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %and = and i32 %x, 7
+  %hello_p = getelementptr inbounds i32, ptr @null_hello_mid, i32 %and
+  %hello_l = call i64 @wcslen(ptr %hello_p)
+  ret i64 %hello_l
+}
+
+define i64 @test_simplify11_gepi8(i32 %x) {
+; CHECK-LABEL: @test_simplify11_gepi8(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[AND]] to i64
+; CHECK-NEXT:    [[HELLO_P:%.*]] = getelementptr inbounds nuw i8, ptr @null_hello_mid, i64 [[TMP1]]
+; CHECK-NEXT:    [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT:    ret i64 [[HELLO_L]]
+;
+  %and = and i32 %x, 7
+  %hello_p = getelementptr inbounds i8, ptr @null_hello_mid, i32 %and
+  %hello_l = call i64 @wcslen(ptr %hello_p)
+  ret i64 %hello_l
+}
+
 ; Check cases that shouldn't be simplified.
 
 define i64 @test_no_simplify1() {


        


More information about the llvm-commits mailing list