[clang] [clang][bytecode] Implement __builtin_{memchr,strchr,char_memchr} (PR #130420)

Timm Baeder via cfe-commits cfe-commits at lists.llvm.org
Sat Mar 8 07:17:20 PST 2025


https://github.com/tbaederr created https://github.com/llvm/llvm-project/pull/130420

llvm has recently started to use `__builitn_memchr` at compile time, so implement this. Still needs some work but the basics are done.

>From 956594d8c47169a9f45eb2aae03085f79d295390 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder at redhat.com>
Date: Sat, 8 Mar 2025 16:11:37 +0100
Subject: [PATCH] [clang][bytecode] Implement
 __builtin_{memchr,strchr,char_memchr}

---
 clang/lib/AST/ByteCode/InterpBuiltin.cpp      | 107 +++++++++++++++-
 clang/test/AST/ByteCode/builtin-functions.cpp | 118 ++++++++++++++++++
 2 files changed, 224 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 00f99745862ee..b8c4ef2f48a79 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1960,13 +1960,103 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
 
   // However, if we read all the available bytes but were instructed to read
   // even more, diagnose this as a "read of dereferenced one-past-the-end
-  // pointer". This is what would happen if we called CheckRead() on every array
+  // pointer". This is what would happen if we called CheckLoad() on every array
   // element.
   S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_past_end)
       << AK_Read << S.Current->getRange(OpPC);
   return false;
 }
 
+static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
+                                   const InterpFrame *Frame,
+                                   const Function *Func, const CallExpr *Call) {
+  unsigned ID = Func->getBuiltinID();
+  if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
+      ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
+    diagnoseNonConstexprBuiltin(S, OpPC, ID);
+
+  const Pointer &Ptr = getParam<Pointer>(Frame, 0);
+  APSInt Desired;
+  std::optional<APSInt> MaxLength;
+  if (Call->getNumArgs() == 3) {
+    MaxLength =
+        peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)), 0);
+    Desired = peekToAPSInt(
+        S.Stk, *S.getContext().classify(Call->getArg(1)),
+        align(primSize(*S.getContext().classify(Call->getArg(2)))) +
+            align(primSize(*S.getContext().classify(Call->getArg(1)))));
+  } else {
+    Desired = peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(1)));
+  }
+
+  if (MaxLength && MaxLength->isZero()) {
+    S.Stk.push<Pointer>();
+    return true;
+  }
+
+  if (Ptr.isDummy())
+    return false;
+
+  // Null is only okay if the given size is 0.
+  if (Ptr.isZero()) {
+    S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_null)
+        << AK_Read;
+    return false;
+  }
+
+  QualType ElemTy = Ptr.getFieldDesc()->isArray()
+                        ? Ptr.getFieldDesc()->getElemQualType()
+                        : Ptr.getFieldDesc()->getType();
+  bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
+
+  // Give up on byte-oriented matching against multibyte elements.
+  if (IsRawByte && !isOneByteCharacterType(ElemTy)) {
+    S.FFDiag(S.Current->getSource(OpPC),
+             diag::note_constexpr_memchr_unsupported)
+        << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
+    return false;
+  }
+
+  if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
+    // strchr compares directly to the passed integer, and therefore
+    // always fails if given an int that is not a char.
+    if (Desired !=
+        Desired.trunc(S.getASTContext().getCharWidth()).getSExtValue()) {
+      S.Stk.push<Pointer>();
+      return true;
+    }
+  }
+
+  uint64_t DesiredVal =
+      Desired.trunc(S.getASTContext().getCharWidth()).getZExtValue();
+  bool StopAtZero =
+      (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr);
+
+  size_t Index = Ptr.getIndex();
+  for (;;) {
+    const Pointer &ElemPtr = Index > 0 ? Ptr.atIndex(Index) : Ptr;
+
+    if (!CheckLoad(S, OpPC, ElemPtr))
+      return false;
+
+    unsigned char V = static_cast<unsigned char>(ElemPtr.deref<char>());
+    if (V == DesiredVal) {
+      S.Stk.push<Pointer>(ElemPtr);
+      return true;
+    }
+
+    if (StopAtZero && V == 0)
+      break;
+
+    ++Index;
+    if (MaxLength && Index == MaxLength->getZExtValue())
+      break;
+  }
+
+  S.Stk.push<Pointer>();
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
                       const CallExpr *Call, uint32_t BuiltinID) {
   const InterpFrame *Frame = S.Current;
@@ -2445,6 +2535,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
       return false;
     break;
 
+  case Builtin::BImemchr:
+  case Builtin::BI__builtin_memchr:
+  case Builtin::BIstrchr:
+  case Builtin::BI__builtin_strchr:
+#if 0
+  case Builtin::BIwcschr:
+  case Builtin::BI__builtin_wcschr:
+  case Builtin::BImemchr:
+  case Builtin::BI__builtin_wmemchr:
+#endif
+  case Builtin::BI__builtin_char_memchr:
+    if (!interp__builtin_memchr(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 75380f99901a2..dbff9164a91c1 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -18,6 +18,8 @@
 extern "C" {
   typedef decltype(sizeof(int)) size_t;
   extern size_t wcslen(const wchar_t *p);
+  extern void *memchr(const void *s, int c, size_t n);
+  extern char *strchr(const char *s, int c);
 }
 
 namespace strcmp {
@@ -1351,3 +1353,119 @@ namespace Memcmp {
   static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 6) == -1);
   static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 5) == 0);
 }
+
+namespace Memchr {
+  constexpr const char *kStr = "abca\xff\0d";
+  constexpr char kFoo[] = {'f', 'o', 'o'};
+
+  static_assert(__builtin_memchr(kStr, 'a', 0) == nullptr);
+  static_assert(__builtin_memchr(kStr, 'a', 1) == kStr);
+  static_assert(__builtin_memchr(kStr, '\0', 5) == nullptr);
+  static_assert(__builtin_memchr(kStr, '\0', 6) == kStr + 5);
+  static_assert(__builtin_memchr(kStr, '\xff', 8) == kStr + 4);
+  static_assert(__builtin_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
+  static_assert(__builtin_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
+  static_assert(__builtin_memchr(kFoo, 'x', 3) == nullptr);
+  static_assert(__builtin_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
+                                                            // both-note {{dereferenced one-past-the-end}}
+  static_assert(__builtin_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
+                                                               // both-note {{dereferenced null}}
+  static_assert(__builtin_memchr(nullptr, 'x', 0) == nullptr);
+
+
+#if defined(CHAR8_T)
+  constexpr const char8_t *kU8Str = u8"abca\xff\0d";
+  constexpr char8_t kU8Foo[] = {u8'f', u8'o', u8'o'};
+  static_assert(__builtin_memchr(kU8Str, u8'a', 0) == nullptr);
+  static_assert(__builtin_memchr(kU8Str, u8'a', 1) == kU8Str);
+  static_assert(__builtin_memchr(kU8Str, u8'\0', 5) == nullptr);
+  static_assert(__builtin_memchr(kU8Str, u8'\0', 6) == kU8Str + 5);
+  static_assert(__builtin_memchr(kU8Str, u8'\xff', 8) == kU8Str + 4);
+  static_assert(__builtin_memchr(kU8Str, u8'\xff' + 256, 8) == kU8Str + 4);
+  static_assert(__builtin_memchr(kU8Str, u8'\xff' - 256, 8) == kU8Str + 4);
+  static_assert(__builtin_memchr(kU8Foo, u8'x', 3) == nullptr);
+  static_assert(__builtin_memchr(kU8Foo, u8'x', 4) == nullptr); // both-error {{not an integral constant}} \
+                                                                // both-note {{dereferenced one-past-the-end}}
+  static_assert(__builtin_memchr(nullptr, u8'x', 3) == nullptr); // both-error {{not an integral constant}} \
+                                                                 // both-note {{dereferenced null}}
+  static_assert(__builtin_memchr(nullptr, u8'x', 0) == nullptr);
+#endif
+
+  extern struct Incomplete incomplete;
+  static_assert(__builtin_memchr(&incomplete, 0, 0u) == nullptr);
+  static_assert(__builtin_memchr(&incomplete, 0, 1u) == nullptr); // both-error {{not an integral constant}} \
+                                                                  // ref-note {{read of incomplete type 'struct Incomplete'}}
+
+  const unsigned char &u1 = 0xf0;
+  auto &&i1 = (const signed char []){-128};
+  static_assert(__builtin_memchr(&u1, -(0x0f + 1), 1) == &u1);
+  static_assert(__builtin_memchr(i1, 0x80, 1) == i1);
+
+  enum class E : unsigned char {};
+  struct EPair { E e, f; };
+  constexpr EPair ee{E{240}};
+  static_assert(__builtin_memchr(&ee.e, 240, 1) == &ee.e); // both-error {{constant}} \
+                                                           // both-note {{not supported}}
+
+  constexpr bool kBool[] = {false, true, false};
+  constexpr const bool *const kBoolPastTheEndPtr = kBool + 3;
+  static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, 1, 99) == kBool + 1); // both-error {{constant}} \
+                                                                                                     // both-note {{not supported}}
+  static_assert(sizeof(bool) != 1u || __builtin_memchr(kBool + 1, 0, 99) == kBoolPastTheEndPtr - 1); // both-error {{constant}} \
+                                                                                                     // both-note {{not supported}}
+  static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, -1, 3) == nullptr); // both-error {{constant}} \
+                                                                                                   // both-note {{not supported}}
+  static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr, 0, 1) == nullptr); // both-error {{constant}} \
+                                                                                              // both-note {{not supported}}
+
+  static_assert(__builtin_char_memchr(kStr, 'a', 0) == nullptr);
+  static_assert(__builtin_char_memchr(kStr, 'a', 1) == kStr);
+  static_assert(__builtin_char_memchr(kStr, '\0', 5) == nullptr);
+  static_assert(__builtin_char_memchr(kStr, '\0', 6) == kStr + 5);
+  static_assert(__builtin_char_memchr(kStr, '\xff', 8) == kStr + 4);
+  static_assert(__builtin_char_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
+  static_assert(__builtin_char_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
+  static_assert(__builtin_char_memchr(kFoo, 'x', 3) == nullptr);
+  static_assert(__builtin_char_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
+                                                                 // both-note {{dereferenced one-past-the-end}}
+  static_assert(__builtin_char_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
+                                                                    // both-note {{dereferenced null}}
+  static_assert(__builtin_char_memchr(nullptr, 'x', 0) == nullptr);
+
+  static_assert(*__builtin_char_memchr(kStr, '\xff', 8) == '\xff');
+  constexpr bool char_memchr_mutable() {
+    char buffer[] = "mutable";
+    *__builtin_char_memchr(buffer, 't', 8) = 'r';
+    *__builtin_char_memchr(buffer, 'm', 8) = 'd';
+    return __builtin_strcmp(buffer, "durable") == 0;
+  }
+  static_assert(char_memchr_mutable());
+
+  constexpr bool b = !memchr("hello", 'h', 3); // both-error {{constant expression}} \
+                                               // both-note {{non-constexpr function 'memchr' cannot be used in a constant expression}}
+
+}
+
+namespace Strchr {
+  constexpr const char *kStr = "abca\xff\0d";
+  constexpr char kFoo[] = {'f', 'o', 'o'};
+  static_assert(__builtin_strchr(kStr, 'a') == kStr);
+  static_assert(__builtin_strchr(kStr, 'b') == kStr + 1);
+  static_assert(__builtin_strchr(kStr, 'c') == kStr + 2);
+  static_assert(__builtin_strchr(kStr, 'd') == nullptr);
+  static_assert(__builtin_strchr(kStr, 'e') == nullptr);
+  static_assert(__builtin_strchr(kStr, '\0') == kStr + 5);
+  static_assert(__builtin_strchr(kStr, 'a' + 256) == nullptr);
+  static_assert(__builtin_strchr(kStr, 'a' - 256) == nullptr);
+  static_assert(__builtin_strchr(kStr, '\xff') == kStr + 4);
+  static_assert(__builtin_strchr(kStr, '\xff' + 256) == nullptr);
+  static_assert(__builtin_strchr(kStr, '\xff' - 256) == nullptr);
+  static_assert(__builtin_strchr(kFoo, 'o') == kFoo + 1);
+  static_assert(__builtin_strchr(kFoo, 'x') == nullptr); // both-error {{not an integral constant}} \
+                                                         // both-note {{dereferenced one-past-the-end}}
+  static_assert(__builtin_strchr(nullptr, 'x') == nullptr); // both-error {{not an integral constant}} \
+                                                            // both-note {{dereferenced null}}
+
+  constexpr bool a = !strchr("hello", 'h'); // both-error {{constant expression}} \
+                                            // both-note {{non-constexpr function 'strchr' cannot be used in a constant expression}}
+}



More information about the cfe-commits mailing list