[llvm] [MC] AsmLexer invalid read fix. (PR #154972)

Szymon Piotr Milczek via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 15 10:36:51 PDT 2025


https://github.com/smilczek updated https://github.com/llvm/llvm-project/pull/154972

>From 2f5a950e5ca5cf81f00330b899613e03c43d7bf9 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Fri, 22 Aug 2025 17:44:51 +0200
Subject: [PATCH 1/2] [MCParser] AsmLexer assert buffer is null-terminated.

If the null terminator is included in the buffer length privided to
AsmLexer (where `CurBuf.end()` points to memory that doesn't belong to
the buffer), when `CurPtr == CurBuf.end()` AsmLexer can perform an
invalid read by dereferencing CurPtr.

Clearly AsmLexer expects a null-terminated buffer where the null
terminator is placed at memory pointed to by `CurBuf.end()`

This commit adds an assert as means of documentation.
---
 llvm/lib/MC/MCParser/AsmLexer.cpp     |  5 +++
 llvm/test/MC/AsmParser/invalid-read.s |  4 ++
 llvm/unittests/MC/CMakeLists.txt      |  3 +-
 llvm/unittests/MC/MCParser.cpp        | 61 +++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/MC/AsmParser/invalid-read.s
 create mode 100644 llvm/unittests/MC/MCParser.cpp

diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 968ccf776440b..9a2fb12cc4b6a 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -120,6 +120,11 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
 
 void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
                          bool EndStatementAtEOF) {
+  // Null terminator must be part of the actual buffer. It must reside at
+  // `Buf.end()`. It must be safe to dereference `Buf.end()`.
+  assert(*Buf.end() == '\0' &&
+         "Buffer provided to AsmLexer lacks null terminator.");
+
   CurBuf = Buf;
 
   if (ptr)
diff --git a/llvm/test/MC/AsmParser/invalid-read.s b/llvm/test/MC/AsmParser/invalid-read.s
new file mode 100644
index 0000000000000..9555554909dad
--- /dev/null
+++ b/llvm/test/MC/AsmParser/invalid-read.s
@@ -0,0 +1,4 @@
+# RUN: printf 'ret\0 ' > %t.s
+# RUN: llvm-mc %t.s --triple=x86_64 --as-lex | FileCheck %s
+
+# CHECK-NOT: ERROR: AddressSanitizer
diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt
index da8e219113f46..95b3c4b5a96d1 100644
--- a/llvm/unittests/MC/CMakeLists.txt
+++ b/llvm/unittests/MC/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
   MC
   MCDisassembler
+  MCParser
   Object
   Support
   TargetParser
@@ -18,8 +19,8 @@ add_llvm_unittest(MCTests
   DwarfLineTables.cpp
   DwarfLineTableHeaders.cpp
   MCInstPrinter.cpp
+  MCParser.cpp
   StringTableBuilderTest.cpp
   TargetRegistry.cpp
   MCDisassemblerTest.cpp
   )
-
diff --git a/llvm/unittests/MC/MCParser.cpp b/llvm/unittests/MC/MCParser.cpp
new file mode 100644
index 0000000000000..592e93e9bfd5f
--- /dev/null
+++ b/llvm/unittests/MC/MCParser.cpp
@@ -0,0 +1,61 @@
+//===- llvm/unittest/Object/Disassembler.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+class MCAsmParserTest : public ::testing::Test {
+public:
+  std::unique_ptr<MCRegisterInfo> MRI;
+  std::unique_ptr<MCAsmInfo> MAI;
+
+  MCAsmParserTest() {
+    llvm::InitializeAllTargetInfos();
+    llvm::InitializeAllTargetMCs();
+
+    StringRef TripleName = "x86_64-pc-linux";
+    Triple TT(TripleName);
+    std::string ErrorStr;
+
+    const Target *TheTarget = TargetRegistry::lookupTarget(TT, ErrorStr);
+
+    // If we didn't build x86, do not run the test.
+    if (!TheTarget)
+      return;
+
+    MRI.reset(TheTarget->createMCRegInfo(TT));
+    MCTargetOptions MCOptions;
+    MAI.reset(TheTarget->createMCAsmInfo(*MRI, TT, MCOptions));
+  }
+};
+} // namespace
+
+TEST_F(MCAsmParserTest, InvalidRead) {
+  AsmLexer Lexer(*MAI);
+  const char* Source = "ret\0 ";
+  StringRef SourceRef(Source, 4); // Include null terminator in buffer length
+  Lexer.setBuffer(SourceRef);
+
+  bool Error = false;
+  while (Lexer.Lex().isNot(AsmToken::Eof)) {
+    if (Lexer.getTok().getKind() == AsmToken::Error)
+      Error = true;
+  }
+  ASSERT_TRUE(Error == false);
+}

>From b05aa6207925af76401186e07907011c6b7d2314 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Wed, 15 Oct 2025 19:36:29 +0200
Subject: [PATCH 2/2] remove not working lit test

---
 llvm/test/MC/AsmParser/invalid-read.s | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 llvm/test/MC/AsmParser/invalid-read.s

diff --git a/llvm/test/MC/AsmParser/invalid-read.s b/llvm/test/MC/AsmParser/invalid-read.s
deleted file mode 100644
index 9555554909dad..0000000000000
--- a/llvm/test/MC/AsmParser/invalid-read.s
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: printf 'ret\0 ' > %t.s
-# RUN: llvm-mc %t.s --triple=x86_64 --as-lex | FileCheck %s
-
-# CHECK-NOT: ERROR: AddressSanitizer



More information about the llvm-commits mailing list