[llvm] 6321e4d - [SimplifyLibCalls] Transform memchr(STR, C, N) to chain of ORs

Dávid Bolvanský via llvm-commits llvm-commits at lists.llvm.org
Sun May 7 06:12:09 PDT 2023


Author: Dávid Bolvanský
Date: 2023-05-07T15:12:03+02:00
New Revision: 6321e4ddf7da1d2761ffa2ed368b4f8cc5b75b6e

URL: https://github.com/llvm/llvm-project/commit/6321e4ddf7da1d2761ffa2ed368b4f8cc5b75b6e
DIFF: https://github.com/llvm/llvm-project/commit/6321e4ddf7da1d2761ffa2ed368b4f8cc5b75b6e.diff

LOG: [SimplifyLibCalls] Transform memchr(STR, C, N) to chain of ORs

Motivation:

```
#include <string_view>

size_t findFirst_ABCDEF(std::string_view sv) {
  return sv.find_first_of("ABCDEF");
}
```

memchr("ABCDEF", C,  6) != NULL -> (C == 'A' || C == 'B' || C == 'C' || C == 'D' || C == 'E' || C == 'F') != 0

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D128011

Added: 
    

Modified: 
    llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/test/Transforms/InstCombine/memchr-7.ll
    llvm/test/Transforms/InstCombine/memchr.ll

Removed: 
    llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 65be1243cadab..400c316fc1a1d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1365,6 +1365,10 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
     return nullptr;
   }
 
+  bool OptForSize = CI->getFunction()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+                                                PGSOQueryType::IRPass);
+
   // If the char is variable but the input str and length are not we can turn
   // this memchr call into a simple bit field test. Of course this only works
   // when the return value is only checked against null.
@@ -1375,7 +1379,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
   // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
   // != 0
   //   after bounds check.
-  if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
+  if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
     return nullptr;
 
   unsigned char Max =
@@ -1387,8 +1391,34 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
   // FIXME: On a 64 bit architecture this prevents us from using the
   // interesting range of alpha ascii chars. We could do better by emitting
   // two bitfields or shifting the range by 64 if no lower chars are used.
-  if (!DL.fitsInLegalInteger(Max + 1))
-    return nullptr;
+  if (!DL.fitsInLegalInteger(Max + 1)) {
+    // Build chain of ORs
+    // Transform:
+    //    memchr("abcd", C, 4) != nullptr
+    // to:
+    //    (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0
+    std::string SortedStr = Str.str();
+    llvm::sort(SortedStr);
+    // Compute the number of of non-contiguous ranges.
+    unsigned NonContRanges = 1;
+    for (size_t i = 1; i < SortedStr.size(); ++i) {
+      if (SortedStr[i] > SortedStr[i - 1] + 1) {
+        NonContRanges++;
+      }
+    }
+
+    // Restrict this optimization to profitable cases with one or two range
+    // checks.
+    if (NonContRanges > 2)
+      return nullptr;
+
+    SmallVector<Value *> CharCompares;
+    for (char C : SortedStr)
+      CharCompares.push_back(
+          B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C)));
+
+    return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType());
+  }
 
   // For the bit field use a power-of-2 type with at least 8 bits to avoid
   // creating unnecessary illegal types.

diff  --git a/llvm/test/Transforms/InstCombine/memchr-7.ll b/llvm/test/Transforms/InstCombine/memchr-7.ll
index 27f251cb06d96..c9bd069debf17 100644
--- a/llvm/test/Transforms/InstCombine/memchr-7.ll
+++ b/llvm/test/Transforms/InstCombine/memchr-7.ll
@@ -1,17 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s
 
 @.str = private unnamed_addr constant [27 x i8] c"abcdefghijklmnopqrstuvwxyz\00", align 1
 @.str.1 = private unnamed_addr constant [2 x i8] c"\0D\0A", align 1
+ at .str.2 = private unnamed_addr constant [10 x i8] c"abcdefmno\00", align 1
+ at .str.3 = private unnamed_addr constant [10 x i8] c"abcijkmno\00", align 1
+ at .str.4 = private unnamed_addr constant [7 x i8] c"mnabcc\00", align 1
 
 declare ptr @strchr(ptr, i32)
 declare ptr @memchr(ptr, i32, i64)
 
 define zeroext i1 @strchr_to_memchr_n_equals_len(i32 %c) {
 ; CHECK-LABEL: @strchr_to_memchr_n_equals_len(
-; CHECK-NEXT:    [[MEMCHR:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 27)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[MEMCHR]], null
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[C]], -97
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26
+; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %call = tail call ptr @strchr(ptr nonnull dereferenceable(27) @.str, i32 %c)
   %cmp = icmp ne ptr %call, null
@@ -33,9 +38,9 @@ define zeroext i1 @memchr_n_equals_len(i32 %c) {
 
 define zeroext i1 @memchr_n_less_than_len(i32 %c) {
 ; CHECK-LABEL: @memchr_n_less_than_len(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 15)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[CALL]], null
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C:%.*]], -97
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 15
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 15)
   %cmp = icmp ne ptr %call, null
@@ -45,9 +50,11 @@ define zeroext i1 @memchr_n_less_than_len(i32 %c) {
 
 define zeroext i1 @memchr_n_more_than_len(i32 %c) {
 ; CHECK-LABEL: @memchr_n_more_than_len(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 30)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[CALL]], null
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[C]], -97
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26
+; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 30)
   %cmp = icmp ne ptr %call, null
@@ -103,3 +110,45 @@ define zeroext i1 @memchr_n_equals_len2_minsize(i32 %c) minsize {
   %cmp = icmp eq ptr %call, null
   ret i1 %cmp
 }
+
+; Positive test - 2 non-contiguous ranges
+define zeroext i1 @strchr_to_memchr_2_non_cont_ranges(i32 %c) {
+; CHECK-LABEL: @strchr_to_memchr_2_non_cont_ranges(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C:%.*]], -97
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 6
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[C]], -109
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %call = tail call ptr @memchr(ptr @.str.2, i32 %c, i64 9)
+  %cmp = icmp ne ptr %call, null
+  ret i1 %cmp
+}
+
+; Positive test - 2 non-contiguous ranges with char duplication
+define zeroext i1 @strchr_to_memchr_2_non_cont_ranges_char_dup(i32 %c) {
+; CHECK-LABEL: @strchr_to_memchr_2_non_cont_ranges_char_dup(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C:%.*]], -97
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[C]], -109
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+  %call = tail call ptr @memchr(ptr @.str.4, i32 %c, i64 6)
+  %cmp = icmp ne ptr %call, null
+  ret i1 %cmp
+}
+
+; Negative test - more than 2 non-contiguous ranges
+define zeroext i1 @strchr_to_memchr_3_non_cont_ranges(i32 %c) {
+; CHECK-LABEL: @strchr_to_memchr_3_non_cont_ranges(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str.3, i32 [[C:%.*]], i64 9)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[CALL]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = tail call ptr @memchr(ptr @.str.3, i32 %c, i64 9)
+  %cmp = icmp ne ptr %call, null
+  ret i1 %cmp
+}

diff  --git a/llvm/test/Transforms/InstCombine/memchr.ll b/llvm/test/Transforms/InstCombine/memchr.ll
index 7929d435b2484..213238cfdea1c 100644
--- a/llvm/test/Transforms/InstCombine/memchr.ll
+++ b/llvm/test/Transforms/InstCombine/memchr.ll
@@ -174,9 +174,9 @@ define i1 @test14(i32 %C) {
 
 define i1 @test15(i32 %C) {
 ; CHECK-LABEL: @test15(
-; CHECK-NEXT:    [[DST:%.*]] = call ptr @memchr(ptr noundef nonnull dereferenceable(1) @negative, i32 [[C:%.*]], i32 3)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[DST]], null
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %dst = call ptr @memchr(ptr @negative, i32 %C, i32 3)
   %cmp = icmp ne ptr %dst, null

diff  --git a/llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll b/llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll
deleted file mode 100644
index 2994ccaef0073..0000000000000
--- a/llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes='instcombine,simplifycfg' -S < %s  | FileCheck %s
-
- at .str = private constant [7 x i8] c"abcdef\00", align 1
-
-declare ptr @memchr(ptr noundef, i32 noundef, i64 noundef)
-declare i32 @foo()
-
-define i32 @memchr_to_switch_br(i32 %a) {
-; CHECK-LABEL: @memchr_to_switch_br(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[A:%.*]], i64 noundef 6)
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[CALL]], null
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[RETURN:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @foo()
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL_0]]
-;
-entry:
-  %call = call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %a, i64 noundef 6) #3
-  %cmp.not = icmp eq ptr %call, null
-  br i1 %cmp.not, label %return, label %if.then
-
-if.then:                                          ; preds = %entry
-  %call1 = call i32 @foo() #3
-  br label %return
-
-return:                                           ; preds = %entry, %if.then
-  %retval.0 = phi i32 [ %call1, %if.then ], [ -1, %entry ]
-  ret i32 %retval.0
-}
-
-define i1 @memchr_to_switch_eq_ret(i32 %c) {
-; CHECK-LABEL: @memchr_to_switch_eq_ret(
-; CHECK-NEXT:    [[P:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[C:%.*]], i64 noundef 6)
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq ptr [[P]], null
-; CHECK-NEXT:    ret i1 [[RET]]
-;
-  %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6)
-  %ret = icmp eq ptr %p, null
-  ret i1 %ret
-}
-
-define i1 @memchr_to_switch_ne_ret(i32 %c) {
-; CHECK-LABEL: @memchr_to_switch_ne_ret(
-; CHECK-NEXT:    [[P:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[C:%.*]], i64 noundef 6)
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne ptr [[P]], null
-; CHECK-NEXT:    ret i1 [[RET]]
-;
-  %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6)
-  %ret = icmp ne ptr %p, null
-  ret i1 %ret
-}


        


More information about the llvm-commits mailing list