[llvm] [Transforms] Mirror optimizeStrRChr with optimizeStrChr (PR #77685)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 10 17:22:38 PST 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/77685

>From ff795beb9c39ffb6cd07e835ea48f8b51bc60903 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Wed, 10 Jan 2024 15:44:36 -0500
Subject: [PATCH 1/2] [Transforms] Mirror optimizeStrRChr with optimizeStrChr

strchr and strrchr have a lot in common. Basically, they can have most optimizations applied to them, only difference being we work backwards, and the "end" of the string being the first argument, and the "start" being the null terminator. Basically, we can do similar transformations with the same transformations and checks, especially if memrchr does the same as memchr but backwards from the null terminator.
---
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 58 +++++++++++++++----
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index a7cd68e860e467..a16a8d80c54ba4 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -483,8 +483,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
     Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
     return copyFlags(*CI,
                      emitMemChr(SrcStr, CharVal, // include nul.
-                                ConstantInt::get(SizeTTy, Len), B,
-                                DL, TLI));
+                                ConstantInt::get(SizeTTy, Len), B, DL, TLI));
   }
 
   if (CharC->isZero()) {
@@ -523,22 +522,57 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilderBase &B) {
   ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
   annotateNonNullNoUndefBasedOnAccess(CI, 0);
 
+  if (!CharC) {
+    uint64_t Len = GetStringLength(SrcStr);
+    if (Len)
+      annotateDereferenceableBytes(CI, 0, Len);
+    else
+      return nullptr;
+
+    Function *Callee = CI->getCalledFunction();
+    FunctionType *FT = Callee->getFunctionType();
+    unsigned IntBits = TLI->getIntSize();
+    if (!FT->getParamType(1)->isIntegerTy(IntBits)) // memrchr needs 'int'.
+      return nullptr;
+
+    unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
+    Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
+
+    // Try to expand strrchr to the memrchr nonstandard extension if it's
+    // available, or simply fail otherwise.
+    return copyFlags(*CI,
+                     emitMemRChr(SrcStr, CharVal, // include nul.
+                                 ConstantInt::get(SizeTTy, Len), B, DL, TLI));
+  }
+
+  if (CharC->isZero()) {
+    Value *NullPtr = Constant::getNullValue(CI->getType());
+    if (isOnlyUsedInEqualityComparison(CI, NullPtr))
+      // Pre-empt the transformation to strlen below and fold
+      // strrchr(A, '\0') == null to false.
+      return B.CreateIntToPtr(B.getTrue(), CI->getType());
+  }
+
+  // Otherwise, the character is a constant, see if the first argument is
+  // a string literal.  If so, we can constant fold.
   StringRef Str;
   if (!getConstantStringInfo(SrcStr, Str)) {
-    // strrchr(s, 0) -> strchr(s, 0)
-    if (CharC && CharC->isZero())
-      return copyFlags(*CI, emitStrChr(SrcStr, '\0', B, TLI));
+    if (CharC->isZero()) // strrchr(p, 0) -> p + strlen(p)
+      if (Value *StrLen = emitStrLen(SrcStr, B, DL, TLI))
+        return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, StrLen, "strrchr");
     return nullptr;
   }
 
-  unsigned SizeTBits = TLI->getSizeTSize(*CI->getModule());
-  Type *SizeTTy = IntegerType::get(CI->getContext(), SizeTBits);
+  // Compute the offset, make sure to handle the case when we're searching for
+  // zero (a weird way to spell strlen).
+  size_t I = (0xFF & CharC->getSExtValue()) == 0
+                 ? Str.size()
+                 : Str.rfind(CharC->getSExtValue());
+  if (I == StringRef::npos) // Didn't find the char.  strrchr returns null.
+    return Constant::getNullValue(CI->getType());
 
-  // Try to expand strrchr to the memrchr nonstandard extension if it's
-  // available, or simply fail otherwise.
-  uint64_t NBytes = Str.size() + 1;   // Include the terminating nul.
-  Value *Size = ConstantInt::get(SizeTTy, NBytes);
-  return copyFlags(*CI, emitMemRChr(SrcStr, CharVal, Size, B, DL, TLI));
+  // strrchr(s+n,c)  -> gep(s+n+i,c)
+  return B.CreateInBoundsGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
 }
 
 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilderBase &B) {

>From 6b31ce114c444cd8bf141f53619d9c383e82920e Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Wed, 10 Jan 2024 20:21:57 -0500
Subject: [PATCH 2/2] Fold strrchr(a5 + 5, '\0') to null

Either both strrchr and strchr transforms should resolve to null, or none of them should, because strchr(a5 + 5, '\0') is also undefined behavior, but the compiler is nice about that.

If this is not desirable, we should maybe have both return poisoned values then.
---
 llvm/test/Transforms/InstCombine/strcall-no-nul.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/strcall-no-nul.ll b/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
index fef06a03639c7c..ca07e4cb0f18e2 100644
--- a/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
+++ b/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
@@ -101,11 +101,11 @@ define void @fold_strncmp_past_end(ptr %pcmp) {
 }
 
 
-; Fold strrchr(a5 + 5, '\0') to poison (it's UB).
+; Fold strrchr(a5 + 5, '\0') to null
 
 define ptr @fold_strrchr_past_end(i32 %c) {
 ; CHECK-LABEL: @fold_strrchr_past_end(
-; CHECK-NEXT:    ret ptr poison
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([5 x i8], ptr @a5, i64 1, i64 0)
 ;
   %p5 = getelementptr [5 x i8], ptr @a5, i32 0, i32 5
   %r = call ptr @strrchr(ptr %p5, i32 0)



More information about the llvm-commits mailing list