[llvm] 0d68ff8 - [InstCombine] Transform strrchr to memrchr for constant strings

Martin Sebor via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 1 10:12:31 PDT 2022


Author: Martin Sebor
Date: 2022-07-01T11:10:00-06:00
New Revision: 0d68ff87d2b03522ac43646a491d731586518e4c

URL: https://github.com/llvm/llvm-project/commit/0d68ff87d2b03522ac43646a491d731586518e4c
DIFF: https://github.com/llvm/llvm-project/commit/0d68ff87d2b03522ac43646a491d731586518e4c.diff

LOG: [InstCombine] Transform strrchr to memrchr for constant strings

Add an emitter for the memrchr common extension and simplify the strrchr
call handler to use it. This enables transforming calls with the empty
string to the test C ? S : 0.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D128954

Added: 
    llvm/test/Transforms/InstCombine/strrchr-3.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
    llvm/lib/Transforms/Utils/BuildLibCalls.cpp
    llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/test/Transforms/InstCombine/strcall-no-nul.ll
    llvm/test/Transforms/InstCombine/strrchr-1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index d993d73014c13..6ea195ce31acc 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -139,6 +139,10 @@ namespace llvm {
   Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
                     const DataLayout &DL, const TargetLibraryInfo *TLI);
 
+  /// Emit a call to the memrchr function, analogously to emitMemChr.
+  Value *emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
+                    const DataLayout &DL, const TargetLibraryInfo *TLI);
+
   /// Emit a call to the memcmp function.
   Value *emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
                     const DataLayout &DL, const TargetLibraryInfo *TLI);

diff  --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 666b2767f1954..c4a58f36c1715 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1305,6 +1305,7 @@ FunctionCallee llvm::getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI,
   case LibFunc_ldexpf:
   case LibFunc_ldexpl:
   case LibFunc_memchr:
+  case LibFunc_memrchr:
   case LibFunc_strchr:
     setArgExtAttr(*F, 1, TLI);
     break;
@@ -1538,6 +1539,15 @@ Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
       {castToCStr(Ptr, B), Val, Len}, B, TLI);
 }
 
+Value *llvm::emitMemRChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B,
+                        const DataLayout &DL, const TargetLibraryInfo *TLI) {
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  return emitLibCall(
+      LibFunc_memrchr, B.getInt8PtrTy(),
+      {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr, B), Val, Len}, B, TLI);
+}
+
 Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
   LLVMContext &Context = B.GetInsertBlock()->getContext();

diff  --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1ff3798147dee..f4306bb43dfdd 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -344,30 +344,24 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
 
 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
   Value *SrcStr = CI->getArgOperand(0);
-  ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  Value *CharVal = CI->getArgOperand(1);
+  ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
   annotateNonNullNoUndefBasedOnAccess(CI, 0);
 
-  // Cannot fold anything if we're not looking for a constant.
-  if (!CharC)
-    return nullptr;
-
   StringRef Str;
   if (!getConstantStringInfo(SrcStr, Str)) {
     // strrchr(s, 0) -> strchr(s, 0)
-    if (CharC->isZero())
+    if (CharC && CharC->isZero())
       return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
     return nullptr;
   }
 
-  // Compute the offset.
-  size_t I = (0xFF & CharC->getSExtValue()) == 0
-                 ? Str.size()
-                 : Str.rfind(CharC->getSExtValue());
-  if (I == StringRef::npos) // Didn't find the char. Return null.
-    return Constant::getNullValue(CI->getType());
-
-  // strrchr(s+n,c) -> gep(s+n+i,c)
-  return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
+  // Try to expand strrchr to the memrchr nonstandard extension if it's
+  // available, or simply fail otherwise.
+  uint64_t NBytes = Str.size() + 1;   // Include the terminating nul.
+  Type *IntPtrType = DL.getIntPtrType(CI->getContext());
+  Value *Size = ConstantInt::get(IntPtrType, NBytes);
+  return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI));
 }
 
 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {

diff  --git a/llvm/test/Transforms/InstCombine/strcall-no-nul.ll b/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
index 9f895437f94cc..e46ea737bb4da 100644
--- a/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
+++ b/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
@@ -101,7 +101,7 @@ define void @fold_strncmp_past_end(i32* %pcmp) {
 
 define i8* @fold_strrchr_past_end(i32 %c) {
 ; CHECK-LABEL: @fold_strrchr_past_end(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 1, i64 0)
+; CHECK-NEXT:    ret i8* null
 ;
   %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
   %r = call i8* @strrchr(i8* %p5, i32 0)

diff  --git a/llvm/test/Transforms/InstCombine/strrchr-1.ll b/llvm/test/Transforms/InstCombine/strrchr-1.ll
index 2fd36601fff86..fdf6c30fc20f0 100644
--- a/llvm/test/Transforms/InstCombine/strrchr-1.ll
+++ b/llvm/test/Transforms/InstCombine/strrchr-1.ll
@@ -58,10 +58,10 @@ define void @test_simplify4() {
   ret void
 }
 
-define void @test_nosimplify1(i32 %chr) {
-; CHECK-LABEL: @test_nosimplify1(
-; CHECK-NEXT:    [[DST:%.*]] = call i8* @strrchr(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 0), i32 [[CHR:%.*]])
-; CHECK-NEXT:    store i8* [[DST]], i8** @chp, align 4
+define void @test_xform_to_memrchr(i32 %chr) {
+; CHECK-LABEL: @test_xform_to_memrchr(
+; CHECK-NEXT:    [[MEMRCHR:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(14) getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 0), i32 [[CHR:%.*]], i32 14)
+; CHECK-NEXT:    store i8* [[MEMRCHR]], i8** @chp, align 4
 ; CHECK-NEXT:    ret void
 ;
 

diff  --git a/llvm/test/Transforms/InstCombine/strrchr-3.ll b/llvm/test/Transforms/InstCombine/strrchr-3.ll
new file mode 100644
index 0000000000000..390f83c3acde1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strrchr-3.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Verify the strrchr("", c) to (unsigned char)c ? "" : 0 transformetion.
+;
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+ at s10 = constant [11 x i8] c"0123456789\00"
+
+declare i8* @strrchr(i8*, i32)
+
+; Fold strrchr(s + 10, c) to (unsigned char)c ? 0 : s + 10.
+
+define i8* @fold_strrchr_sp10_x(i32 %c) {
+; CHECK-LABEL: @fold_strrchr_sp10_x(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8
+; CHECK-NEXT:    [[MEMRCHR_CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[MEMRCHR_CHAR0CMP]], i8* getelementptr inbounds ([11 x i8], [11 x i8]* @s10, i64 0, i64 10), i8* null
+; CHECK-NEXT:    ret i8* [[MEMRCHR_SEL]]
+;
+  %psp10 = getelementptr [11 x i8], [11 x i8]* @s10, i32 0, i32 10
+  %pc = call i8* @strrchr(i8* %psp10, i32 %c)
+  ret i8* %pc
+}
+
+
+; Transform strrchr(s + 9, c) to [the equivalent of] memrchr(s + 9, c, 2).
+
+define i8* @call_strrchr_sp9_x(i32 %c) {
+; CHECK-LABEL: @call_strrchr_sp9_x(
+; CHECK-NEXT:    [[MEMRCHR:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(2) getelementptr inbounds ([11 x i8], [11 x i8]* @s10, i64 0, i64 9), i32 [[C:%.*]], i64 2)
+; CHECK-NEXT:    ret i8* [[MEMRCHR]]
+;
+  %psp9 = getelementptr [11 x i8], [11 x i8]* @s10, i32 0, i32 9
+  %pc = call i8* @strrchr(i8* %psp9, i32 %c)
+  ret i8* %pc
+}
+
+
+; Do not transform strrchr(s + 2, c) (for short strings this could be
+; folded into a chain of OR expressions ala D128011).
+
+define i8* @call_strrchr_sp2_x(i32 %c) {
+; CHECK-LABEL: @call_strrchr_sp2_x(
+; CHECK-NEXT:    [[MEMRCHR:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(9) getelementptr inbounds ([11 x i8], [11 x i8]* @s10, i64 0, i64 2), i32 [[C:%.*]], i64 9)
+; CHECK-NEXT:    ret i8* [[MEMRCHR]]
+;
+  %psp2 = getelementptr [11 x i8], [11 x i8]* @s10, i32 0, i32 2
+  %pc = call i8* @strrchr(i8* %psp2, i32 %c)
+  ret i8* %pc
+}
+
+
+; Do not transform strrchr(s + 1, c).
+
+define i8* @call_strrchr_sp1_x(i32 %c) {
+; CHECK-LABEL: @call_strrchr_sp1_x(
+; CHECK-NEXT:    [[MEMRCHR:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(10) getelementptr inbounds ([11 x i8], [11 x i8]* @s10, i64 0, i64 1), i32 [[C:%.*]], i64 10)
+; CHECK-NEXT:    ret i8* [[MEMRCHR]]
+;
+  %psp1 = getelementptr [11 x i8], [11 x i8]* @s10, i32 0, i32 1
+  %pc = call i8* @strrchr(i8* %psp1, i32 %c)
+  ret i8* %pc
+}


        


More information about the llvm-commits mailing list