[llvm] f58cfac - [AggressiveInstCombine] Expand memchr with small constant strings (#98501)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 09:25:39 PDT 2024


Author: Yingwei Zheng
Date: 2024-07-17T00:25:36+08:00
New Revision: f58cfacfafda0961396f92816692e9a316dec0c2

URL: https://github.com/llvm/llvm-project/commit/f58cfacfafda0961396f92816692e9a316dec0c2
DIFF: https://github.com/llvm/llvm-project/commit/f58cfacfafda0961396f92816692e9a316dec0c2.diff

LOG:  [AggressiveInstCombine] Expand memchr with small constant strings (#98501)

This patch converts memchr with a small constant string into a switch.
It will reduce overhead of libcall and enable more folds (e.g.,
comparing the result with null).

References: https://en.cppreference.com/w/c/string/byte/memchr

Added: 
    llvm/test/Transforms/AggressiveInstCombine/memchr.ll

Modified: 
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 1e0b8d448b9d1..d5a38ec17a2a8 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -54,6 +54,11 @@ static cl::opt<unsigned> StrNCmpInlineThreshold(
     cl::desc("The maximum length of a constant string for a builtin string cmp "
              "call eligible for inlining. The default value is 3."));
 
+static cl::opt<unsigned>
+    MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
+                          cl::desc("The maximum length of a constant string to "
+                                   "inline a memchr call."));
+
 /// Match a pattern for a bitwise funnel/rotate operation that partially guards
 /// against undefined behavior by branching around the funnel-shift/rotation
 /// when the shift amount is 0.
@@ -1103,6 +1108,81 @@ void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
   }
 }
 
+/// Convert memchr with a small constant string into a switch
+static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU,
+                       const DataLayout &DL) {
+  if (isa<Constant>(Call->getArgOperand(1)))
+    return false;
+
+  StringRef Str;
+  Value *Base = Call->getArgOperand(0);
+  if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
+    return false;
+
+  uint64_t N = Str.size();
+  if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
+    uint64_t Val = ConstInt->getZExtValue();
+    // Ignore the case that n is larger than the size of string.
+    if (Val > N)
+      return false;
+    N = Val;
+  } else
+    return false;
+
+  if (N > MemChrInlineThreshold)
+    return false;
+
+  BasicBlock *BB = Call->getParent();
+  BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
+  IRBuilder<> IRB(BB);
+  IntegerType *ByteTy = IRB.getInt8Ty();
+  BB->getTerminator()->eraseFromParent();
+  SwitchInst *SI = IRB.CreateSwitch(
+      IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
+  Type *IndexTy = DL.getIndexType(Call->getType());
+  SmallVector<DominatorTree::UpdateType, 8> Updates;
+
+  BasicBlock *BBSuccess = BasicBlock::Create(
+      Call->getContext(), "memchr.success", BB->getParent(), BBNext);
+  IRB.SetInsertPoint(BBSuccess);
+  PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
+  Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
+  IRB.CreateBr(BBNext);
+  if (DTU)
+    Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
+
+  SmallPtrSet<ConstantInt *, 4> Cases;
+  for (uint64_t I = 0; I < N; ++I) {
+    ConstantInt *CaseVal = ConstantInt::get(ByteTy, Str[I]);
+    if (!Cases.insert(CaseVal).second)
+      continue;
+
+    BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
+                                            BB->getParent(), BBSuccess);
+    SI->addCase(CaseVal, BBCase);
+    IRB.SetInsertPoint(BBCase);
+    IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
+    IRB.CreateBr(BBSuccess);
+    if (DTU) {
+      Updates.push_back({DominatorTree::Insert, BB, BBCase});
+      Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
+    }
+  }
+
+  PHINode *PHI =
+      PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
+  PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
+  PHI->addIncoming(FirstOccursLocation, BBSuccess);
+
+  Call->replaceAllUsesWith(PHI);
+  Call->eraseFromParent();
+
+  if (DTU)
+    DTU->applyUpdates(Updates);
+
+  return true;
+}
+
 static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
                          TargetLibraryInfo &TLI, AssumptionCache &AC,
                          DominatorTree &DT, const DataLayout &DL,
@@ -1135,6 +1215,12 @@ static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
       return true;
     }
     break;
+  case LibFunc_memchr:
+    if (foldMemChr(CI, &DTU, DL)) {
+      MadeCFGChange = true;
+      return true;
+    }
+    break;
   default:;
   }
   return false;

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/memchr.ll b/llvm/test/Transforms/AggressiveInstCombine/memchr.ll
new file mode 100644
index 0000000000000..2601b9f05a97f
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/memchr.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=aggressive-instcombine --memchr-inline-threshold=5 < %s | FileCheck %s
+
+ at str = constant [5 x i8] c"01\002\00", align 1
+ at str_long = constant [8 x i8] c"0123456\00", align 1
+
+declare ptr @memchr(ptr, i32, i64)
+
+define i1 @test_memchr_null(i32 %x) {
+; CHECK-LABEL: define i1 @test_memchr_null(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT:    switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
+; CHECK-NEXT:      i8 48, label %[[MEMCHR_CASE:.*]]
+; CHECK-NEXT:      i8 49, label %[[MEMCHR_CASE1:.*]]
+; CHECK-NEXT:      i8 0, label %[[MEMCHR_CASE2:.*]]
+; CHECK-NEXT:      i8 50, label %[[MEMCHR_CASE3:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[MEMCHR_CASE]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS:.*]]
+; CHECK:       [[MEMCHR_CASE1]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_CASE2]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_CASE3]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_SUCCESS]]:
+; CHECK-NEXT:    [[MEMCHR_IDX:%.*]] = phi i64 [ 0, %[[MEMCHR_CASE]] ], [ 1, %[[MEMCHR_CASE1]] ], [ 2, %[[MEMCHR_CASE2]] ], [ 3, %[[MEMCHR_CASE3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr @str, i64 [[MEMCHR_IDX]]
+; CHECK-NEXT:    br label %[[ENTRY_SPLIT]]
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    [[MEMCHR4:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[TMP1]], %[[MEMCHR_SUCCESS]] ]
+; CHECK-NEXT:    [[ISNULL:%.*]] = icmp eq ptr [[MEMCHR4]], null
+; CHECK-NEXT:    ret i1 [[ISNULL]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr @str, i32 %x, i64 5)
+  %isnull = icmp eq ptr %memchr, null
+  ret i1 %isnull
+}
+
+define ptr @test_memchr(i32 %x) {
+; CHECK-LABEL: define ptr @test_memchr(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT:    switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
+; CHECK-NEXT:      i8 48, label %[[MEMCHR_CASE:.*]]
+; CHECK-NEXT:      i8 49, label %[[MEMCHR_CASE1:.*]]
+; CHECK-NEXT:      i8 0, label %[[MEMCHR_CASE2:.*]]
+; CHECK-NEXT:      i8 50, label %[[MEMCHR_CASE3:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[MEMCHR_CASE]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS:.*]]
+; CHECK:       [[MEMCHR_CASE1]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_CASE2]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_CASE3]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_SUCCESS]]:
+; CHECK-NEXT:    [[MEMCHR_IDX:%.*]] = phi i64 [ 0, %[[MEMCHR_CASE]] ], [ 1, %[[MEMCHR_CASE1]] ], [ 2, %[[MEMCHR_CASE2]] ], [ 3, %[[MEMCHR_CASE3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr @str, i64 [[MEMCHR_IDX]]
+; CHECK-NEXT:    br label %[[ENTRY_SPLIT]]
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    [[MEMCHR4:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[TMP1]], %[[MEMCHR_SUCCESS]] ]
+; CHECK-NEXT:    ret ptr [[MEMCHR4]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr @str, i32 %x, i64 5)
+  ret ptr %memchr
+}
+
+define ptr @test_memchr_smaller_n(i32 %x) {
+; CHECK-LABEL: define ptr @test_memchr_smaller_n(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT:    switch i8 [[TMP0]], label %[[ENTRY_SPLIT:.*]] [
+; CHECK-NEXT:      i8 48, label %[[MEMCHR_CASE:.*]]
+; CHECK-NEXT:      i8 49, label %[[MEMCHR_CASE1:.*]]
+; CHECK-NEXT:      i8 0, label %[[MEMCHR_CASE2:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[MEMCHR_CASE]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS:.*]]
+; CHECK:       [[MEMCHR_CASE1]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_CASE2]]:
+; CHECK-NEXT:    br label %[[MEMCHR_SUCCESS]]
+; CHECK:       [[MEMCHR_SUCCESS]]:
+; CHECK-NEXT:    [[MEMCHR_IDX:%.*]] = phi i64 [ 0, %[[MEMCHR_CASE]] ], [ 1, %[[MEMCHR_CASE1]] ], [ 2, %[[MEMCHR_CASE2]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr @str, i64 [[MEMCHR_IDX]]
+; CHECK-NEXT:    br label %[[ENTRY_SPLIT]]
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    [[MEMCHR3:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[TMP1]], %[[MEMCHR_SUCCESS]] ]
+; CHECK-NEXT:    ret ptr [[MEMCHR3]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr @str, i32 %x, i64 3)
+  ret ptr %memchr
+}
+
+; negative tests
+
+define ptr @test_memchr_larger_n(i32 %x) {
+; CHECK-LABEL: define ptr @test_memchr_larger_n(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 [[X]], i64 6)
+; CHECK-NEXT:    ret ptr [[MEMCHR]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr @str, i32 %x, i64 6)
+  ret ptr %memchr
+}
+
+define ptr @test_memchr_non_constant(i32 %x, ptr %str) {
+; CHECK-LABEL: define ptr @test_memchr_non_constant(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[STR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr [[STR]], i32 [[X]], i64 5)
+; CHECK-NEXT:    ret ptr [[MEMCHR]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr %str, i32 %x, i64 5)
+  ret ptr %memchr
+}
+
+define ptr @test_memchr_constant_ch() {
+; CHECK-LABEL: define ptr @test_memchr_constant_ch() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 49, i64 5)
+; CHECK-NEXT:    ret ptr [[MEMCHR]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr @str, i32 49, i64 5)
+  ret ptr %memchr
+}
+
+define ptr @test_memchr_dynamic_n(i32 %x, i32 %y) {
+; CHECK-LABEL: define ptr @test_memchr_dynamic_n(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str, i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret ptr [[MEMCHR]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr @str, i32 %x, i32 %y)
+  ret ptr %memchr
+}
+
+define ptr @test_memchr_long(i32 %x) {
+; CHECK-LABEL: define ptr @test_memchr_long(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[MEMCHR:%.*]] = call ptr @memchr(ptr @str_long, i32 [[X]], i64 8)
+; CHECK-NEXT:    ret ptr [[MEMCHR]]
+;
+entry:
+  %memchr = call ptr @memchr(ptr @str_long, i32 %x, i64 8)
+  ret ptr %memchr
+}


        


More information about the llvm-commits mailing list