[llvm] [LLVM][IR] Fix a couple of StringRef OOB error in AsmParser (PR #151556)
Rahul Joshi via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 09:46:00 PDT 2025
https://github.com/jurahul created https://github.com/llvm/llvm-project/pull/151556
- Fix LLLexer::getNextChar() to not index OOB into the CurBuf.
- Add unit tests that exercise these cases using a heap allocated StringRef. They show asan failures.
>From b7f69b3f7d669f449c5fea65172ffd6aad3e8d85 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Thu, 31 Jul 2025 09:43:11 -0700
Subject: [PATCH] [LLVM][IR] Fix a couple of StringRef OOB error in AsmParser
- Fix LLLexer::getNextChar() to not index OOB into the CurBuf.
- Add unit tests that exercise these cases using a heap allocated
StringRef. They show asan failures.
---
llvm/include/llvm/AsmParser/LLLexer.h | 1 +
llvm/lib/AsmParser/LLLexer.cpp | 44 ++++++++-------
llvm/lib/AsmParser/LLParser.cpp | 3 +-
llvm/lib/AsmParser/Parser.cpp | 3 +-
llvm/unittests/AsmParser/AsmParserTest.cpp | 64 ++++++++++++++++++----
5 files changed, 82 insertions(+), 33 deletions(-)
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 501a7aefccd7f..fcefdb449f73e 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -94,6 +94,7 @@ namespace llvm {
lltok::Kind LexToken();
int getNextChar();
+ bool isLabelTail();
void SkipLineComment();
bool SkipCComment();
lltok::Kind ReadString(lltok::Kind kind);
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 520c6a00a9c07..62d701a9f501a 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -150,9 +150,8 @@ static void UnEscapeLexed(std::string &Str) {
}
/// isLabelChar - Return true for [-a-zA-Z$._0-9].
-static bool isLabelChar(char C) {
- return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
- C == '.' || C == '_';
+static bool isLabelChar(int C) {
+ return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
}
/// isLabelTail - Return true if this pointer points to a valid end of a label.
@@ -175,19 +174,24 @@ LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
}
int LLLexer::getNextChar() {
- char CurChar = *CurPtr++;
- switch (CurChar) {
- default: return (unsigned char)CurChar;
- case 0:
- // A nul character in the stream is either the end of the current buffer or
- // a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf.end())
- return 0; // Just whitespace.
-
- // Otherwise, return end of file.
- --CurPtr; // Another call to lex will return EOF again.
+ if (CurPtr == CurBuf.end())
return EOF;
+ return *CurPtr++;
+}
+
+/// isLabelTail - Return true if the buffer is valid end of a label. If not,
+/// it returns false and restores the current pointer to where it was.
+bool LLLexer::isLabelTail() {
+ const char *SavedPtr = CurPtr;
+ while (true) {
+ int CurChar = getNextChar();
+ if (CurChar == ':')
+ return true;
+ if (!isLabelChar(CurChar))
+ break;
}
+ CurPtr = SavedPtr;
+ return false;
}
lltok::Kind LLLexer::LexToken() {
@@ -215,8 +219,7 @@ lltok::Kind LLLexer::LexToken() {
case '%': return LexPercent();
case '"': return LexQuote();
case '.':
- if (const char *Ptr = isLabelTail(CurPtr)) {
- CurPtr = Ptr;
+ if (isLabelTail()) {
StrVal.assign(TokStart, CurPtr-1);
return lltok::LabelStr;
}
@@ -262,7 +265,8 @@ lltok::Kind LLLexer::LexToken() {
void LLLexer::SkipLineComment() {
while (true) {
- if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
+ int CurChar = getNextChar();
+ if (CurChar == '\n' || CurChar == '\r' || CurChar == EOF)
return;
}
}
@@ -298,7 +302,7 @@ lltok::Kind LLLexer::LexAt() {
}
lltok::Kind LLLexer::LexDollar() {
- if (const char *Ptr = isLabelTail(TokStart)) {
+ if (const char *Ptr = ::isLabelTail(TokStart)) {
CurPtr = Ptr;
StrVal.assign(TokStart, CurPtr - 1);
return lltok::LabelStr;
@@ -1145,7 +1149,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
!isdigit(static_cast<unsigned char>(CurPtr[0]))) {
// Okay, this is not a number after the -, it's probably a label.
- if (const char *End = isLabelTail(CurPtr)) {
+ if (const char *End = ::isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
CurPtr = End;
return lltok::LabelStr;
@@ -1172,7 +1176,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
// Check to see if this really is a string label, e.g. "-1:".
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
- if (const char *End = isLabelTail(CurPtr)) {
+ if (const char *End = ::isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
CurPtr = End;
return lltok::LabelStr;
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 13bef1f62f1a9..1ad2cd61daaf6 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -6917,7 +6917,8 @@ bool LLParser::parseFunctionBody(Function &Fn, unsigned FunctionNumber,
ArrayRef<unsigned> UnnamedArgNums) {
if (Lex.getKind() != lltok::lbrace)
return tokError("expected '{' in function body");
- Lex.Lex(); // eat the {.
+ if (Lex.Lex() == lltok::Error) // eat the {.
+ return tokError("Invalid token");
PerFunctionState PFS(*this, Fn, FunctionNumber, UnnamedArgNums);
diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp
index 07fdce981b084..a7e1e61a35037 100644
--- a/llvm/lib/AsmParser/Parser.cpp
+++ b/llvm/lib/AsmParser/Parser.cpp
@@ -26,7 +26,8 @@ static bool parseAssemblyInto(MemoryBufferRef F, Module *M,
SlotMapping *Slots, bool UpgradeDebugInfo,
DataLayoutCallbackTy DataLayoutCallback) {
SourceMgr SM;
- std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F);
+ std::unique_ptr<MemoryBuffer> Buf =
+ MemoryBuffer::getMemBuffer(F, /*RequiresNullTerminator*/ false);
SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
std::optional<LLVMContext> OptContext;
diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index ce226705068af..27a4f78e49be4 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -22,31 +22,67 @@ using namespace llvm;
namespace {
+class HeapAllocatedStringRef {
+ std::unique_ptr<char[]> Data;
+ size_t Size;
+
+public:
+ HeapAllocatedStringRef(StringRef S) : Size(S.size()) {
+ Data = std::make_unique<char[]>(Size);
+ std::strncpy(Data.get(), S.data(), Size);
+ }
+ operator StringRef() const { return StringRef(Data.get(), Size); }
+};
+
TEST(AsmParserTest, NullTerminatedInput) {
LLVMContext Ctx;
- StringRef Source = "; Empty module \n";
+ StringRef Source = "; Empty module\n";
SMDiagnostic Error;
auto Mod = parseAssemblyString(Source, Error, Ctx);
EXPECT_TRUE(Mod != nullptr);
EXPECT_TRUE(Error.getMessage().empty());
-}
-#ifdef GTEST_HAS_DEATH_TEST
-#ifndef NDEBUG
+ Mod = parseAssemblyString(HeapAllocatedStringRef(Source), Error, Ctx);
+
+ EXPECT_TRUE(Mod != nullptr);
+ EXPECT_TRUE(Error.getMessage().empty());
+}
TEST(AsmParserTest, NonNullTerminatedInput) {
LLVMContext Ctx;
- StringRef Source = "; Empty module \n\1\2";
+ StringRef Source = "; Empty module\n\1\2";
+ SMDiagnostic Error;
+
+ auto Mod = parseAssemblyString(Source.drop_back(2), Error, Ctx);
+
+ EXPECT_TRUE(Mod != nullptr);
+ EXPECT_TRUE(Error.getMessage().empty());
+}
+
+TEST(AsmParserTest, CommentTerminatedWithEOF) {
+ LLVMContext Ctx;
+ StringRef Source = "; Empty module";
SMDiagnostic Error;
- std::unique_ptr<Module> Mod;
- EXPECT_DEATH(Mod = parseAssemblyString(Source.substr(0, Source.size() - 2),
- Error, Ctx),
- "Buffer is not null terminated!");
+
+ auto Mod = parseAssemblyString(HeapAllocatedStringRef(Source), Error, Ctx);
+
+ EXPECT_TRUE(Mod != nullptr);
+ EXPECT_TRUE(Error.getMessage().empty());
}
-#endif
-#endif
+TEST(AsmParserTest, LabelTailTerminatedWithEOF) {
+ LLVMContext Ctx;
+ StringRef Source = R"(
+ define void @main() {
+ .l12)";
+ SMDiagnostic Error;
+
+ auto Mod = parseAssemblyString(HeapAllocatedStringRef(Source), Error, Ctx);
+
+ EXPECT_EQ(Mod, nullptr);
+ EXPECT_EQ(Error.getMessage(), "Invalid token");
+}
TEST(AsmParserTest, SlotMappingTest) {
LLVMContext Ctx;
@@ -65,6 +101,12 @@ TEST(AsmParserTest, SlotMappingTest) {
EXPECT_EQ(Mapping.MetadataNodes.count(0), 1u);
EXPECT_EQ(Mapping.MetadataNodes.count(42), 1u);
EXPECT_EQ(Mapping.MetadataNodes.count(1), 0u);
+
+ Mod =
+ parseAssemblyString(HeapAllocatedStringRef(Source), Error, Ctx, &Mapping);
+
+ EXPECT_TRUE(Mod != nullptr);
+ EXPECT_TRUE(Error.getMessage().empty());
}
TEST(AsmParserTest, TypeAndConstantValueParsing) {
More information about the llvm-commits
mailing list