[clang] [clang][bytecode] Implement __builtin_{memchr,strchr,char_memchr} (PR #130420)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Mar 8 07:17:52 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Timm Baeder (tbaederr)
<details>
<summary>Changes</summary>
llvm has recently started to use `__builitn_memchr` at compile time, so implement this. Still needs some work but the basics are done.
---
Full diff: https://github.com/llvm/llvm-project/pull/130420.diff
2 Files Affected:
- (modified) clang/lib/AST/ByteCode/InterpBuiltin.cpp (+106-1)
- (modified) clang/test/AST/ByteCode/builtin-functions.cpp (+118)
``````````diff
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 00f99745862ee..b8c4ef2f48a79 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1960,13 +1960,103 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
// However, if we read all the available bytes but were instructed to read
// even more, diagnose this as a "read of dereferenced one-past-the-end
- // pointer". This is what would happen if we called CheckRead() on every array
+ // pointer". This is what would happen if we called CheckLoad() on every array
// element.
S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_past_end)
<< AK_Read << S.Current->getRange(OpPC);
return false;
}
+static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC,
+ const InterpFrame *Frame,
+ const Function *Func, const CallExpr *Call) {
+ unsigned ID = Func->getBuiltinID();
+ if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr ||
+ ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr)
+ diagnoseNonConstexprBuiltin(S, OpPC, ID);
+
+ const Pointer &Ptr = getParam<Pointer>(Frame, 0);
+ APSInt Desired;
+ std::optional<APSInt> MaxLength;
+ if (Call->getNumArgs() == 3) {
+ MaxLength =
+ peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)), 0);
+ Desired = peekToAPSInt(
+ S.Stk, *S.getContext().classify(Call->getArg(1)),
+ align(primSize(*S.getContext().classify(Call->getArg(2)))) +
+ align(primSize(*S.getContext().classify(Call->getArg(1)))));
+ } else {
+ Desired = peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(1)));
+ }
+
+ if (MaxLength && MaxLength->isZero()) {
+ S.Stk.push<Pointer>();
+ return true;
+ }
+
+ if (Ptr.isDummy())
+ return false;
+
+ // Null is only okay if the given size is 0.
+ if (Ptr.isZero()) {
+ S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_null)
+ << AK_Read;
+ return false;
+ }
+
+ QualType ElemTy = Ptr.getFieldDesc()->isArray()
+ ? Ptr.getFieldDesc()->getElemQualType()
+ : Ptr.getFieldDesc()->getType();
+ bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr;
+
+ // Give up on byte-oriented matching against multibyte elements.
+ if (IsRawByte && !isOneByteCharacterType(ElemTy)) {
+ S.FFDiag(S.Current->getSource(OpPC),
+ diag::note_constexpr_memchr_unsupported)
+ << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy;
+ return false;
+ }
+
+ if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) {
+ // strchr compares directly to the passed integer, and therefore
+ // always fails if given an int that is not a char.
+ if (Desired !=
+ Desired.trunc(S.getASTContext().getCharWidth()).getSExtValue()) {
+ S.Stk.push<Pointer>();
+ return true;
+ }
+ }
+
+ uint64_t DesiredVal =
+ Desired.trunc(S.getASTContext().getCharWidth()).getZExtValue();
+ bool StopAtZero =
+ (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr);
+
+ size_t Index = Ptr.getIndex();
+ for (;;) {
+ const Pointer &ElemPtr = Index > 0 ? Ptr.atIndex(Index) : Ptr;
+
+ if (!CheckLoad(S, OpPC, ElemPtr))
+ return false;
+
+ unsigned char V = static_cast<unsigned char>(ElemPtr.deref<char>());
+ if (V == DesiredVal) {
+ S.Stk.push<Pointer>(ElemPtr);
+ return true;
+ }
+
+ if (StopAtZero && V == 0)
+ break;
+
+ ++Index;
+ if (MaxLength && Index == MaxLength->getZExtValue())
+ break;
+ }
+
+ S.Stk.push<Pointer>();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
const CallExpr *Call, uint32_t BuiltinID) {
const InterpFrame *Frame = S.Current;
@@ -2445,6 +2535,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
return false;
break;
+ case Builtin::BImemchr:
+ case Builtin::BI__builtin_memchr:
+ case Builtin::BIstrchr:
+ case Builtin::BI__builtin_strchr:
+#if 0
+ case Builtin::BIwcschr:
+ case Builtin::BI__builtin_wcschr:
+ case Builtin::BImemchr:
+ case Builtin::BI__builtin_wmemchr:
+#endif
+ case Builtin::BI__builtin_char_memchr:
+ if (!interp__builtin_memchr(S, OpPC, Frame, F, Call))
+ return false;
+ break;
+
default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp
index 75380f99901a2..dbff9164a91c1 100644
--- a/clang/test/AST/ByteCode/builtin-functions.cpp
+++ b/clang/test/AST/ByteCode/builtin-functions.cpp
@@ -18,6 +18,8 @@
extern "C" {
typedef decltype(sizeof(int)) size_t;
extern size_t wcslen(const wchar_t *p);
+ extern void *memchr(const void *s, int c, size_t n);
+ extern char *strchr(const char *s, int c);
}
namespace strcmp {
@@ -1351,3 +1353,119 @@ namespace Memcmp {
static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 6) == -1);
static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 5) == 0);
}
+
+namespace Memchr {
+ constexpr const char *kStr = "abca\xff\0d";
+ constexpr char kFoo[] = {'f', 'o', 'o'};
+
+ static_assert(__builtin_memchr(kStr, 'a', 0) == nullptr);
+ static_assert(__builtin_memchr(kStr, 'a', 1) == kStr);
+ static_assert(__builtin_memchr(kStr, '\0', 5) == nullptr);
+ static_assert(__builtin_memchr(kStr, '\0', 6) == kStr + 5);
+ static_assert(__builtin_memchr(kStr, '\xff', 8) == kStr + 4);
+ static_assert(__builtin_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
+ static_assert(__builtin_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
+ static_assert(__builtin_memchr(kFoo, 'x', 3) == nullptr);
+ static_assert(__builtin_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+ static_assert(__builtin_memchr(nullptr, 'x', 0) == nullptr);
+
+
+#if defined(CHAR8_T)
+ constexpr const char8_t *kU8Str = u8"abca\xff\0d";
+ constexpr char8_t kU8Foo[] = {u8'f', u8'o', u8'o'};
+ static_assert(__builtin_memchr(kU8Str, u8'a', 0) == nullptr);
+ static_assert(__builtin_memchr(kU8Str, u8'a', 1) == kU8Str);
+ static_assert(__builtin_memchr(kU8Str, u8'\0', 5) == nullptr);
+ static_assert(__builtin_memchr(kU8Str, u8'\0', 6) == kU8Str + 5);
+ static_assert(__builtin_memchr(kU8Str, u8'\xff', 8) == kU8Str + 4);
+ static_assert(__builtin_memchr(kU8Str, u8'\xff' + 256, 8) == kU8Str + 4);
+ static_assert(__builtin_memchr(kU8Str, u8'\xff' - 256, 8) == kU8Str + 4);
+ static_assert(__builtin_memchr(kU8Foo, u8'x', 3) == nullptr);
+ static_assert(__builtin_memchr(kU8Foo, u8'x', 4) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_memchr(nullptr, u8'x', 3) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+ static_assert(__builtin_memchr(nullptr, u8'x', 0) == nullptr);
+#endif
+
+ extern struct Incomplete incomplete;
+ static_assert(__builtin_memchr(&incomplete, 0, 0u) == nullptr);
+ static_assert(__builtin_memchr(&incomplete, 0, 1u) == nullptr); // both-error {{not an integral constant}} \
+ // ref-note {{read of incomplete type 'struct Incomplete'}}
+
+ const unsigned char &u1 = 0xf0;
+ auto &&i1 = (const signed char []){-128};
+ static_assert(__builtin_memchr(&u1, -(0x0f + 1), 1) == &u1);
+ static_assert(__builtin_memchr(i1, 0x80, 1) == i1);
+
+ enum class E : unsigned char {};
+ struct EPair { E e, f; };
+ constexpr EPair ee{E{240}};
+ static_assert(__builtin_memchr(&ee.e, 240, 1) == &ee.e); // both-error {{constant}} \
+ // both-note {{not supported}}
+
+ constexpr bool kBool[] = {false, true, false};
+ constexpr const bool *const kBoolPastTheEndPtr = kBool + 3;
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, 1, 99) == kBool + 1); // both-error {{constant}} \
+ // both-note {{not supported}}
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBool + 1, 0, 99) == kBoolPastTheEndPtr - 1); // both-error {{constant}} \
+ // both-note {{not supported}}
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, -1, 3) == nullptr); // both-error {{constant}} \
+ // both-note {{not supported}}
+ static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr, 0, 1) == nullptr); // both-error {{constant}} \
+ // both-note {{not supported}}
+
+ static_assert(__builtin_char_memchr(kStr, 'a', 0) == nullptr);
+ static_assert(__builtin_char_memchr(kStr, 'a', 1) == kStr);
+ static_assert(__builtin_char_memchr(kStr, '\0', 5) == nullptr);
+ static_assert(__builtin_char_memchr(kStr, '\0', 6) == kStr + 5);
+ static_assert(__builtin_char_memchr(kStr, '\xff', 8) == kStr + 4);
+ static_assert(__builtin_char_memchr(kStr, '\xff' + 256, 8) == kStr + 4);
+ static_assert(__builtin_char_memchr(kStr, '\xff' - 256, 8) == kStr + 4);
+ static_assert(__builtin_char_memchr(kFoo, 'x', 3) == nullptr);
+ static_assert(__builtin_char_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_char_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+ static_assert(__builtin_char_memchr(nullptr, 'x', 0) == nullptr);
+
+ static_assert(*__builtin_char_memchr(kStr, '\xff', 8) == '\xff');
+ constexpr bool char_memchr_mutable() {
+ char buffer[] = "mutable";
+ *__builtin_char_memchr(buffer, 't', 8) = 'r';
+ *__builtin_char_memchr(buffer, 'm', 8) = 'd';
+ return __builtin_strcmp(buffer, "durable") == 0;
+ }
+ static_assert(char_memchr_mutable());
+
+ constexpr bool b = !memchr("hello", 'h', 3); // both-error {{constant expression}} \
+ // both-note {{non-constexpr function 'memchr' cannot be used in a constant expression}}
+
+}
+
+namespace Strchr {
+ constexpr const char *kStr = "abca\xff\0d";
+ constexpr char kFoo[] = {'f', 'o', 'o'};
+ static_assert(__builtin_strchr(kStr, 'a') == kStr);
+ static_assert(__builtin_strchr(kStr, 'b') == kStr + 1);
+ static_assert(__builtin_strchr(kStr, 'c') == kStr + 2);
+ static_assert(__builtin_strchr(kStr, 'd') == nullptr);
+ static_assert(__builtin_strchr(kStr, 'e') == nullptr);
+ static_assert(__builtin_strchr(kStr, '\0') == kStr + 5);
+ static_assert(__builtin_strchr(kStr, 'a' + 256) == nullptr);
+ static_assert(__builtin_strchr(kStr, 'a' - 256) == nullptr);
+ static_assert(__builtin_strchr(kStr, '\xff') == kStr + 4);
+ static_assert(__builtin_strchr(kStr, '\xff' + 256) == nullptr);
+ static_assert(__builtin_strchr(kStr, '\xff' - 256) == nullptr);
+ static_assert(__builtin_strchr(kFoo, 'o') == kFoo + 1);
+ static_assert(__builtin_strchr(kFoo, 'x') == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced one-past-the-end}}
+ static_assert(__builtin_strchr(nullptr, 'x') == nullptr); // both-error {{not an integral constant}} \
+ // both-note {{dereferenced null}}
+
+ constexpr bool a = !strchr("hello", 'h'); // both-error {{constant expression}} \
+ // both-note {{non-constexpr function 'strchr' cannot be used in a constant expression}}
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/130420
More information about the cfe-commits
mailing list