[llvm] a7df02f - [InstCombine] Make strlen optimization more resilient to different gep types. (#153623)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 02:37:21 PDT 2025
Author: David Green
Date: 2025-08-19T10:37:17+01:00
New Revision: a7df02f83c74c7229a09b2f89a51b003075560ab
URL: https://github.com/llvm/llvm-project/commit/a7df02f83c74c7229a09b2f89a51b003075560ab
DIFF: https://github.com/llvm/llvm-project/commit/a7df02f83c74c7229a09b2f89a51b003075560ab.diff
LOG: [InstCombine] Make strlen optimization more resilient to different gep types. (#153623)
This makes the optimization in optimizeStringLength for strlen(gep
@glob, %x) -> sub endof at glob, %x a little more resilient, and maybe a
bit more correct for geps with non-array types.
Added:
Modified:
llvm/include/llvm/Analysis/ValueTracking.h
llvm/lib/Analysis/ValueTracking.cpp
llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
llvm/test/Transforms/InstCombine/strlen-1.ll
llvm/test/Transforms/InstCombine/wcslen-1.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 02990a3cb44f7..15ff129deda13 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -359,11 +359,6 @@ GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
AllowNonInbounds);
}
-/// Returns true if the GEP is based on a pointer to a string (array of
-// \p CharSize integers) and is indexing into this string.
-LLVM_ABI bool isGEPBasedOnPointerToString(const GEPOperator *GEP,
- unsigned CharSize = 8);
-
/// Represents offset+length into a ConstantDataArray.
struct ConstantDataArraySlice {
/// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 50e43a53def6c..8ea3a03b1b4ab 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6356,27 +6356,6 @@ llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
return nullptr;
}
-bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
- unsigned CharSize) {
- // Make sure the GEP has exactly three arguments.
- if (GEP->getNumOperands() != 3)
- return false;
-
- // Make sure the index-ee is a pointer to array of \p CharSize integers.
- // CharSize.
- ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
- if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
- return false;
-
- // Check to make sure that the first operand of the GEP is an integer and
- // has value 0 so that we are sure we're indexing into the initializer.
- const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
- if (!FirstIdx || !FirstIdx->isZero())
- return false;
-
- return true;
-}
-
// If V refers to an initialized global constant, set Slice either to
// its initializer if the size of its elements equals ElementSize, or,
// for ElementSize == 8, to its representation as an array of unsiged
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cc4eb2d1df8ca..2d6a748f45079 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/DataLayout.h"
@@ -977,8 +978,14 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
// it's not very useful because calling strlen for a pointer of other types is
// very uncommon.
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Src)) {
- // TODO: Handle subobjects.
- if (!isGEPBasedOnPointerToString(GEP, CharSize))
+ unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+ SmallMapVector<Value *, APInt, 4> VarOffsets;
+ APInt ConstOffset(BW, 0);
+ assert(CharSize % 8 == 0 && "Expected a multiple of 8 sized CharSize");
+ // Check the gep is a single variable offset.
+ if (!GEP->collectOffset(DL, BW, VarOffsets, ConstOffset) ||
+ VarOffsets.size() != 1 || ConstOffset != 0 ||
+ VarOffsets.begin()->second != CharSize / 8)
return nullptr;
ConstantDataArraySlice Slice;
@@ -1000,10 +1007,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
return nullptr;
}
- Value *Offset = GEP->getOperand(2);
+ Value *Offset = VarOffsets.begin()->first;
KnownBits Known = computeKnownBits(Offset, DL, nullptr, CI, nullptr);
- uint64_t ArrSize =
- cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
// If Offset is not provably in the range [0, NullTermIdx], we can still
// optimize if we can prove that the program has undefined behavior when
@@ -1011,7 +1016,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
// is a pointer to an object whose memory extent is NullTermIdx+1.
if ((Known.isNonNegative() && Known.getMaxValue().ule(NullTermIdx)) ||
(isa<GlobalVariable>(GEP->getOperand(0)) &&
- NullTermIdx == ArrSize - 1)) {
+ NullTermIdx == Slice.Length - 1)) {
Offset = B.CreateSExtOrTrunc(Offset, CI->getType());
return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
Offset);
diff --git a/llvm/test/Transforms/InstCombine/strlen-1.ll b/llvm/test/Transforms/InstCombine/strlen-1.ll
index 4616767997512..27f9e4ace3a7b 100644
--- a/llvm/test/Transforms/InstCombine/strlen-1.ll
+++ b/llvm/test/Transforms/InstCombine/strlen-1.ll
@@ -116,6 +116,25 @@ define i32 @test_simplify10_inbounds(i32 %x) {
ret i32 %hello_l
}
+define i32 @test_simplify10_inbounds_i8gep(i32 %x) {
+; CHECK-LABEL: @test_simplify10_inbounds_i8gep(
+; CHECK-NEXT: [[HELLO_L:%.*]] = sub i32 5, [[X:%.*]]
+; CHECK-NEXT: ret i32 [[HELLO_L]]
+;
+ %hello_p = getelementptr inbounds i8, ptr @hello, i32 %x
+ %hello_l = call i32 @strlen(ptr %hello_p)
+ ret i32 %hello_l
+}
+
+define i32 @test_simplify10_inbounds_i8gep_const() {
+; CHECK-LABEL: @test_simplify10_inbounds_i8gep_const(
+; CHECK-NEXT: ret i32 3
+;
+ %hello_p = getelementptr inbounds i8, ptr @hello, i32 2
+ %hello_l = call i32 @strlen(ptr %hello_p)
+ ret i32 %hello_l
+}
+
define i32 @test_simplify10_no_inbounds(i32 %x) {
; CHECK-LABEL: @test_simplify10_no_inbounds(
; CHECK-NEXT: [[HELLO_L:%.*]] = sub i32 5, [[X:%.*]]
@@ -126,6 +145,16 @@ define i32 @test_simplify10_no_inbounds(i32 %x) {
ret i32 %hello_l
}
+define i32 @test_simplify10_no_inbounds_i8gep(i32 %x) {
+; CHECK-LABEL: @test_simplify10_no_inbounds_i8gep(
+; CHECK-NEXT: [[HELLO_L:%.*]] = sub i32 5, [[X:%.*]]
+; CHECK-NEXT: ret i32 [[HELLO_L]]
+;
+ %hello_p = getelementptr i8, ptr @hello, i32 %x
+ %hello_l = call i32 @strlen(ptr %hello_p)
+ ret i32 %hello_l
+}
+
; strlen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
define i32 @test_simplify11(i32 %x) {
diff --git a/llvm/test/Transforms/InstCombine/wcslen-1.ll b/llvm/test/Transforms/InstCombine/wcslen-1.ll
index 8a2b66d7bd8d3..ffd62564f1dde 100644
--- a/llvm/test/Transforms/InstCombine/wcslen-1.ll
+++ b/llvm/test/Transforms/InstCombine/wcslen-1.ll
@@ -118,6 +118,63 @@ define i64 @test_simplify10(i32 %x) {
ret i64 %hello_l
}
+define i64 @test_simplify10_gepi32(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi32(
+; CHECK-NEXT: [[HELLO_L:%.*]] = sub i64 5, [[X:%.*]]
+; CHECK-NEXT: ret i64 [[HELLO_L]]
+;
+ %hello_p = getelementptr inbounds i32, ptr @hello, i64 %x
+ %hello_l = call i64 @wcslen(ptr %hello_p)
+ ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi64(i32 %x) {
+; CHECK-LABEL: @test_simplify10_gepi64(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds i64, ptr @hello, i64 [[TMP1]]
+; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT: ret i64 [[HELLO_L]]
+;
+ %hello_p = getelementptr inbounds i64, ptr @hello, i32 %x
+ %hello_l = call i64 @wcslen(ptr %hello_p)
+ ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi16(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi16(
+; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds i16, ptr @hello, i64 [[X:%.*]]
+; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT: ret i64 [[HELLO_L]]
+;
+ %hello_p = getelementptr inbounds i16, ptr @hello, i64 %x
+ %hello_l = call i64 @wcslen(ptr %hello_p)
+ ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi8(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi8(
+; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds i8, ptr @hello, i64 [[TMP1:%.*]]
+; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT: ret i64 [[HELLO_L]]
+;
+ %hello_p = getelementptr inbounds i8, ptr @hello, i64 %x
+ %hello_l = call i64 @wcslen(ptr %hello_p)
+ ret i64 %hello_l
+}
+
+define i64 @test_simplify10_gepi8mul4(i64 %x) {
+; CHECK-LABEL: @test_simplify10_gepi8mul4(
+; CHECK-NEXT: [[Y:%.*]] = shl i64 [[X:%.*]], 2
+; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds i8, ptr @hello, i64 [[Y]]
+; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT: ret i64 [[HELLO_L]]
+;
+ %y = mul i64 %x, 4
+ %hello_p = getelementptr inbounds i8, ptr @hello, i64 %y
+ %hello_l = call i64 @wcslen(ptr %hello_p)
+ ret i64 %hello_l
+}
+
; wcslen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
define i64 @test_simplify11(i32 %x) {
@@ -133,6 +190,33 @@ define i64 @test_simplify11(i32 %x) {
ret i64 %hello_l
}
+define i64 @test_simplify11_gepi32(i32 %x) {
+; CHECK-LABEL: @test_simplify11_gepi32(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 7
+; CHECK-NEXT: [[NARROW:%.*]] = sub nuw nsw i32 9, [[AND]]
+; CHECK-NEXT: [[HELLO_L:%.*]] = zext nneg i32 [[NARROW]] to i64
+; CHECK-NEXT: ret i64 [[HELLO_L]]
+;
+ %and = and i32 %x, 7
+ %hello_p = getelementptr inbounds i32, ptr @null_hello_mid, i32 %and
+ %hello_l = call i64 @wcslen(ptr %hello_p)
+ ret i64 %hello_l
+}
+
+define i64 @test_simplify11_gepi8(i32 %x) {
+; CHECK-LABEL: @test_simplify11_gepi8(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 7
+; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[AND]] to i64
+; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds nuw i8, ptr @null_hello_mid, i64 [[TMP1]]
+; CHECK-NEXT: [[HELLO_L:%.*]] = call i64 @wcslen(ptr nonnull [[HELLO_P]])
+; CHECK-NEXT: ret i64 [[HELLO_L]]
+;
+ %and = and i32 %x, 7
+ %hello_p = getelementptr inbounds i8, ptr @null_hello_mid, i32 %and
+ %hello_l = call i64 @wcslen(ptr %hello_p)
+ ret i64 %hello_l
+}
+
; Check cases that shouldn't be simplified.
define i64 @test_no_simplify1() {
More information about the llvm-commits
mailing list