[llvm] [MC] AsmLexer assert buffer is null-terminated at CurBuf.end() (PR #154972)

Szymon Piotr Milczek via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 20 04:58:15 PDT 2025


https://github.com/smilczek updated https://github.com/llvm/llvm-project/pull/154972

>From 06920b802892d84430eaf54b44b4ca6f5abeb665 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Fri, 22 Aug 2025 17:44:51 +0200
Subject: [PATCH 1/3] [MCParser] AsmLexer assert buffer is null-terminated.

If the null terminator is included in the buffer length privided to
AsmLexer (where `CurBuf.end()` points to memory that doesn't belong to
the buffer), when `CurPtr == CurBuf.end()` AsmLexer can perform an
invalid read by dereferencing CurPtr.

Clearly AsmLexer expects a null-terminated buffer where the null
terminator is placed at memory pointed to by `CurBuf.end()`

This commit adds an assert as means of documentation.
---
 llvm/lib/MC/MCParser/AsmLexer.cpp  |  5 +++
 llvm/unittests/MC/CMakeLists.txt   |  3 +-
 llvm/unittests/MC/Disassembler.cpp |  2 +-
 llvm/unittests/MC/MCParser.cpp     | 58 ++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 llvm/unittests/MC/MCParser.cpp

diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 968ccf776440b..9a2fb12cc4b6a 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -120,6 +120,11 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
 
 void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
                          bool EndStatementAtEOF) {
+  // Null terminator must be part of the actual buffer. It must reside at
+  // `Buf.end()`. It must be safe to dereference `Buf.end()`.
+  assert(*Buf.end() == '\0' &&
+         "Buffer provided to AsmLexer lacks null terminator.");
+
   CurBuf = Buf;
 
   if (ptr)
diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt
index da8e219113f46..95b3c4b5a96d1 100644
--- a/llvm/unittests/MC/CMakeLists.txt
+++ b/llvm/unittests/MC/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
   MC
   MCDisassembler
+  MCParser
   Object
   Support
   TargetParser
@@ -18,8 +19,8 @@ add_llvm_unittest(MCTests
   DwarfLineTables.cpp
   DwarfLineTableHeaders.cpp
   MCInstPrinter.cpp
+  MCParser.cpp
   StringTableBuilderTest.cpp
   TargetRegistry.cpp
   MCDisassemblerTest.cpp
   )
-
diff --git a/llvm/unittests/MC/Disassembler.cpp b/llvm/unittests/MC/Disassembler.cpp
index a02931a8acfea..f91113b6e2b00 100644
--- a/llvm/unittests/MC/Disassembler.cpp
+++ b/llvm/unittests/MC/Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- llvm/unittest/Object/Disassembler.cpp ------------------------------===//
+//===- llvm/unittest/MC/Disassembler.cpp ----------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/unittests/MC/MCParser.cpp b/llvm/unittests/MC/MCParser.cpp
new file mode 100644
index 0000000000000..b9623a1655670
--- /dev/null
+++ b/llvm/unittests/MC/MCParser.cpp
@@ -0,0 +1,58 @@
+//===- llvm/unittest/MC/MCParser.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Host.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+class MCAsmParserTest : public ::testing::Test {
+public:
+  std::unique_ptr<MCRegisterInfo> MRI;
+  std::unique_ptr<MCAsmInfo> MAI;
+
+  MCAsmParserTest() {
+    llvm::InitializeAllTargetInfos();
+    llvm::InitializeAllTargetMCs();
+
+    std::string TripleName = sys::getDefaultTargetTriple();
+    Triple TT(Triple::normalize(TripleName));
+
+    std::string ErrorStr;
+
+    const Target *TheTarget = TargetRegistry::lookupTarget(TT, ErrorStr);
+
+    if (!TheTarget)
+      return;
+
+    MRI.reset(TheTarget->createMCRegInfo(TT));
+    MCTargetOptions MCOptions;
+    MAI.reset(TheTarget->createMCAsmInfo(*MRI, TT, MCOptions));
+  }
+};
+} // namespace
+
+TEST_F(MCAsmParserTest, CheckNullTerminator) {
+  if (!MAI)
+    GTEST_SKIP();
+  AsmLexer Lexer(*MAI);
+  const char *Source = "ret\0 ";
+  StringRef SourceRef(Source, 4); // Include NULL terminator in buffer length
+  EXPECT_DEATH(Lexer.setBuffer(SourceRef),
+               "Buffer provided to AsmLexer lacks null terminator.");
+}

>From a1e34313df12e0521f6f76250393e19434f3e766 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Mon, 20 Oct 2025 13:01:34 +0200
Subject: [PATCH 2/3] remove unittest

---
 llvm/unittests/MC/CMakeLists.txt   |  3 +-
 llvm/unittests/MC/Disassembler.cpp |  2 +-
 llvm/unittests/MC/MCParser.cpp     | 58 ------------------------------
 3 files changed, 2 insertions(+), 61 deletions(-)
 delete mode 100644 llvm/unittests/MC/MCParser.cpp

diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt
index 95b3c4b5a96d1..da8e219113f46 100644
--- a/llvm/unittests/MC/CMakeLists.txt
+++ b/llvm/unittests/MC/CMakeLists.txt
@@ -8,7 +8,6 @@ set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
   MC
   MCDisassembler
-  MCParser
   Object
   Support
   TargetParser
@@ -19,8 +18,8 @@ add_llvm_unittest(MCTests
   DwarfLineTables.cpp
   DwarfLineTableHeaders.cpp
   MCInstPrinter.cpp
-  MCParser.cpp
   StringTableBuilderTest.cpp
   TargetRegistry.cpp
   MCDisassemblerTest.cpp
   )
+
diff --git a/llvm/unittests/MC/Disassembler.cpp b/llvm/unittests/MC/Disassembler.cpp
index f91113b6e2b00..a02931a8acfea 100644
--- a/llvm/unittests/MC/Disassembler.cpp
+++ b/llvm/unittests/MC/Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- llvm/unittest/MC/Disassembler.cpp ----------------------------------===//
+//===- llvm/unittest/Object/Disassembler.cpp ------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/unittests/MC/MCParser.cpp b/llvm/unittests/MC/MCParser.cpp
deleted file mode 100644
index b9623a1655670..0000000000000
--- a/llvm/unittests/MC/MCParser.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- llvm/unittest/MC/MCParser.cpp --------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/TargetParser/Host.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-class MCAsmParserTest : public ::testing::Test {
-public:
-  std::unique_ptr<MCRegisterInfo> MRI;
-  std::unique_ptr<MCAsmInfo> MAI;
-
-  MCAsmParserTest() {
-    llvm::InitializeAllTargetInfos();
-    llvm::InitializeAllTargetMCs();
-
-    std::string TripleName = sys::getDefaultTargetTriple();
-    Triple TT(Triple::normalize(TripleName));
-
-    std::string ErrorStr;
-
-    const Target *TheTarget = TargetRegistry::lookupTarget(TT, ErrorStr);
-
-    if (!TheTarget)
-      return;
-
-    MRI.reset(TheTarget->createMCRegInfo(TT));
-    MCTargetOptions MCOptions;
-    MAI.reset(TheTarget->createMCAsmInfo(*MRI, TT, MCOptions));
-  }
-};
-} // namespace
-
-TEST_F(MCAsmParserTest, CheckNullTerminator) {
-  if (!MAI)
-    GTEST_SKIP();
-  AsmLexer Lexer(*MAI);
-  const char *Source = "ret\0 ";
-  StringRef SourceRef(Source, 4); // Include NULL terminator in buffer length
-  EXPECT_DEATH(Lexer.setBuffer(SourceRef),
-               "Buffer provided to AsmLexer lacks null terminator.");
-}

>From c57126ba6f76ec47e68d3f7a5f230f7ade56290c Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Mon, 20 Oct 2025 13:58:25 +0200
Subject: [PATCH 3/3] Add comments

---
 llvm/include/llvm/MC/MCParser/AsmLexer.h | 7 +++++++
 llvm/lib/MC/MCParser/AsmLexer.cpp        | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h
index 11d32fbb64702..e4cdb2ccd2e6e 100644
--- a/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -45,6 +45,7 @@ class AsmLexer {
   SmallVector<AsmToken, 1> CurTok;
 
   const char *CurPtr = nullptr;
+  /// NULL-terminated buffer. NULL terminator must reside at `CurBuf.end()`.
   StringRef CurBuf;
 
   /// The location and description of the current error
@@ -191,6 +192,12 @@ class AsmLexer {
   /// literals.
   void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
 
+  /// Set buffer to be lexed.
+  /// `Buf` must be NULL-terminated. NULL terminator must reside at `Buf.end()`.
+  /// `ptr` if provided must be in range [`Buf.begin()`, `buf.end()`] or NULL.
+  /// Specifies where lexing of buffer should begin.
+  /// `EndStatementAtEOF` specifies whether `AsmToken::EndOfStatement` should be
+  /// returned upon reaching end of buffer.
   LLVM_ABI void setBuffer(StringRef Buf, const char *ptr = nullptr,
                           bool EndStatementAtEOF = true);
 
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 9a2fb12cc4b6a..8bb7924fbf5f2 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -120,7 +120,7 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
 
 void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
                          bool EndStatementAtEOF) {
-  // Null terminator must be part of the actual buffer. It must reside at
+  // NULL terminator must be part of the actual buffer. It must reside at
   // `Buf.end()`. It must be safe to dereference `Buf.end()`.
   assert(*Buf.end() == '\0' &&
          "Buffer provided to AsmLexer lacks null terminator.");



More information about the llvm-commits mailing list