[llvm] [MC] AsmLexer assert buffer is null-terminated at CurBuf.end() (PR #154972)
Szymon Piotr Milczek via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 05:03:08 PDT 2025
https://github.com/smilczek updated https://github.com/llvm/llvm-project/pull/154972
>From 06920b802892d84430eaf54b44b4ca6f5abeb665 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Fri, 22 Aug 2025 17:44:51 +0200
Subject: [PATCH 1/3] [MCParser] AsmLexer assert buffer is null-terminated.
If the null terminator is included in the buffer length privided to
AsmLexer (where `CurBuf.end()` points to memory that doesn't belong to
the buffer), when `CurPtr == CurBuf.end()` AsmLexer can perform an
invalid read by dereferencing CurPtr.
Clearly AsmLexer expects a null-terminated buffer where the null
terminator is placed at memory pointed to by `CurBuf.end()`
This commit adds an assert as means of documentation.
---
llvm/lib/MC/MCParser/AsmLexer.cpp | 5 +++
llvm/unittests/MC/CMakeLists.txt | 3 +-
llvm/unittests/MC/Disassembler.cpp | 2 +-
llvm/unittests/MC/MCParser.cpp | 58 ++++++++++++++++++++++++++++++
4 files changed, 66 insertions(+), 2 deletions(-)
create mode 100644 llvm/unittests/MC/MCParser.cpp
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 968ccf776440b..9a2fb12cc4b6a 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -120,6 +120,11 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
bool EndStatementAtEOF) {
+ // Null terminator must be part of the actual buffer. It must reside at
+ // `Buf.end()`. It must be safe to dereference `Buf.end()`.
+ assert(*Buf.end() == '\0' &&
+ "Buffer provided to AsmLexer lacks null terminator.");
+
CurBuf = Buf;
if (ptr)
diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt
index da8e219113f46..95b3c4b5a96d1 100644
--- a/llvm/unittests/MC/CMakeLists.txt
+++ b/llvm/unittests/MC/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
MC
MCDisassembler
+ MCParser
Object
Support
TargetParser
@@ -18,8 +19,8 @@ add_llvm_unittest(MCTests
DwarfLineTables.cpp
DwarfLineTableHeaders.cpp
MCInstPrinter.cpp
+ MCParser.cpp
StringTableBuilderTest.cpp
TargetRegistry.cpp
MCDisassemblerTest.cpp
)
-
diff --git a/llvm/unittests/MC/Disassembler.cpp b/llvm/unittests/MC/Disassembler.cpp
index a02931a8acfea..f91113b6e2b00 100644
--- a/llvm/unittests/MC/Disassembler.cpp
+++ b/llvm/unittests/MC/Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- llvm/unittest/Object/Disassembler.cpp ------------------------------===//
+//===- llvm/unittest/MC/Disassembler.cpp ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/unittests/MC/MCParser.cpp b/llvm/unittests/MC/MCParser.cpp
new file mode 100644
index 0000000000000..b9623a1655670
--- /dev/null
+++ b/llvm/unittests/MC/MCParser.cpp
@@ -0,0 +1,58 @@
+//===- llvm/unittest/MC/MCParser.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Host.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+class MCAsmParserTest : public ::testing::Test {
+public:
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+
+ MCAsmParserTest() {
+ llvm::InitializeAllTargetInfos();
+ llvm::InitializeAllTargetMCs();
+
+ std::string TripleName = sys::getDefaultTargetTriple();
+ Triple TT(Triple::normalize(TripleName));
+
+ std::string ErrorStr;
+
+ const Target *TheTarget = TargetRegistry::lookupTarget(TT, ErrorStr);
+
+ if (!TheTarget)
+ return;
+
+ MRI.reset(TheTarget->createMCRegInfo(TT));
+ MCTargetOptions MCOptions;
+ MAI.reset(TheTarget->createMCAsmInfo(*MRI, TT, MCOptions));
+ }
+};
+} // namespace
+
+TEST_F(MCAsmParserTest, CheckNullTerminator) {
+ if (!MAI)
+ GTEST_SKIP();
+ AsmLexer Lexer(*MAI);
+ const char *Source = "ret\0 ";
+ StringRef SourceRef(Source, 4); // Include NULL terminator in buffer length
+ EXPECT_DEATH(Lexer.setBuffer(SourceRef),
+ "Buffer provided to AsmLexer lacks null terminator.");
+}
>From a1e34313df12e0521f6f76250393e19434f3e766 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Mon, 20 Oct 2025 13:01:34 +0200
Subject: [PATCH 2/3] remove unittest
---
llvm/unittests/MC/CMakeLists.txt | 3 +-
llvm/unittests/MC/Disassembler.cpp | 2 +-
llvm/unittests/MC/MCParser.cpp | 58 ------------------------------
3 files changed, 2 insertions(+), 61 deletions(-)
delete mode 100644 llvm/unittests/MC/MCParser.cpp
diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt
index 95b3c4b5a96d1..da8e219113f46 100644
--- a/llvm/unittests/MC/CMakeLists.txt
+++ b/llvm/unittests/MC/CMakeLists.txt
@@ -8,7 +8,6 @@ set(LLVM_LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
MC
MCDisassembler
- MCParser
Object
Support
TargetParser
@@ -19,8 +18,8 @@ add_llvm_unittest(MCTests
DwarfLineTables.cpp
DwarfLineTableHeaders.cpp
MCInstPrinter.cpp
- MCParser.cpp
StringTableBuilderTest.cpp
TargetRegistry.cpp
MCDisassemblerTest.cpp
)
+
diff --git a/llvm/unittests/MC/Disassembler.cpp b/llvm/unittests/MC/Disassembler.cpp
index f91113b6e2b00..a02931a8acfea 100644
--- a/llvm/unittests/MC/Disassembler.cpp
+++ b/llvm/unittests/MC/Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- llvm/unittest/MC/Disassembler.cpp ----------------------------------===//
+//===- llvm/unittest/Object/Disassembler.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/unittests/MC/MCParser.cpp b/llvm/unittests/MC/MCParser.cpp
deleted file mode 100644
index b9623a1655670..0000000000000
--- a/llvm/unittests/MC/MCParser.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- llvm/unittest/MC/MCParser.cpp --------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/TargetParser/Host.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-class MCAsmParserTest : public ::testing::Test {
-public:
- std::unique_ptr<MCRegisterInfo> MRI;
- std::unique_ptr<MCAsmInfo> MAI;
-
- MCAsmParserTest() {
- llvm::InitializeAllTargetInfos();
- llvm::InitializeAllTargetMCs();
-
- std::string TripleName = sys::getDefaultTargetTriple();
- Triple TT(Triple::normalize(TripleName));
-
- std::string ErrorStr;
-
- const Target *TheTarget = TargetRegistry::lookupTarget(TT, ErrorStr);
-
- if (!TheTarget)
- return;
-
- MRI.reset(TheTarget->createMCRegInfo(TT));
- MCTargetOptions MCOptions;
- MAI.reset(TheTarget->createMCAsmInfo(*MRI, TT, MCOptions));
- }
-};
-} // namespace
-
-TEST_F(MCAsmParserTest, CheckNullTerminator) {
- if (!MAI)
- GTEST_SKIP();
- AsmLexer Lexer(*MAI);
- const char *Source = "ret\0 ";
- StringRef SourceRef(Source, 4); // Include NULL terminator in buffer length
- EXPECT_DEATH(Lexer.setBuffer(SourceRef),
- "Buffer provided to AsmLexer lacks null terminator.");
-}
>From 18ecc9ce9294ebf2097bd1dc1262904dafc22ad0 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Mon, 20 Oct 2025 13:58:25 +0200
Subject: [PATCH 3/3] Add comments
---
llvm/include/llvm/MC/MCParser/AsmLexer.h | 7 +++++++
llvm/lib/MC/MCParser/AsmLexer.cpp | 4 ++--
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h
index 11d32fbb64702..e4cdb2ccd2e6e 100644
--- a/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -45,6 +45,7 @@ class AsmLexer {
SmallVector<AsmToken, 1> CurTok;
const char *CurPtr = nullptr;
+ /// NULL-terminated buffer. NULL terminator must reside at `CurBuf.end()`.
StringRef CurBuf;
/// The location and description of the current error
@@ -191,6 +192,12 @@ class AsmLexer {
/// literals.
void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
+ /// Set buffer to be lexed.
+ /// `Buf` must be NULL-terminated. NULL terminator must reside at `Buf.end()`.
+ /// `ptr` if provided must be in range [`Buf.begin()`, `buf.end()`] or NULL.
+ /// Specifies where lexing of buffer should begin.
+ /// `EndStatementAtEOF` specifies whether `AsmToken::EndOfStatement` should be
+ /// returned upon reaching end of buffer.
LLVM_ABI void setBuffer(StringRef Buf, const char *ptr = nullptr,
bool EndStatementAtEOF = true);
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 9a2fb12cc4b6a..6a89c1d09af09 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -120,8 +120,8 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
bool EndStatementAtEOF) {
- // Null terminator must be part of the actual buffer. It must reside at
- // `Buf.end()`. It must be safe to dereference `Buf.end()`.
+ // Buffer must be NULL-terminated. NULL terminator must reside at `Buf.end()`.
+ // It must be safe to dereference `Buf.end()`.
assert(*Buf.end() == '\0' &&
"Buffer provided to AsmLexer lacks null terminator.");
More information about the llvm-commits
mailing list