[llvm] [MC] AsmLexer invalid read fix. (PR #154972)
Szymon Piotr Milczek via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 15 10:36:51 PDT 2025
https://github.com/smilczek updated https://github.com/llvm/llvm-project/pull/154972
>From 2f5a950e5ca5cf81f00330b899613e03c43d7bf9 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Fri, 22 Aug 2025 17:44:51 +0200
Subject: [PATCH 1/2] [MCParser] AsmLexer assert buffer is null-terminated.
If the null terminator is included in the buffer length privided to
AsmLexer (where `CurBuf.end()` points to memory that doesn't belong to
the buffer), when `CurPtr == CurBuf.end()` AsmLexer can perform an
invalid read by dereferencing CurPtr.
Clearly AsmLexer expects a null-terminated buffer where the null
terminator is placed at memory pointed to by `CurBuf.end()`
This commit adds an assert as means of documentation.
---
llvm/lib/MC/MCParser/AsmLexer.cpp | 5 +++
llvm/test/MC/AsmParser/invalid-read.s | 4 ++
llvm/unittests/MC/CMakeLists.txt | 3 +-
llvm/unittests/MC/MCParser.cpp | 61 +++++++++++++++++++++++++++
4 files changed, 72 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/MC/AsmParser/invalid-read.s
create mode 100644 llvm/unittests/MC/MCParser.cpp
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 968ccf776440b..9a2fb12cc4b6a 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -120,6 +120,11 @@ AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
bool EndStatementAtEOF) {
+ // Null terminator must be part of the actual buffer. It must reside at
+ // `Buf.end()`. It must be safe to dereference `Buf.end()`.
+ assert(*Buf.end() == '\0' &&
+ "Buffer provided to AsmLexer lacks null terminator.");
+
CurBuf = Buf;
if (ptr)
diff --git a/llvm/test/MC/AsmParser/invalid-read.s b/llvm/test/MC/AsmParser/invalid-read.s
new file mode 100644
index 0000000000000..9555554909dad
--- /dev/null
+++ b/llvm/test/MC/AsmParser/invalid-read.s
@@ -0,0 +1,4 @@
+# RUN: printf 'ret\0 ' > %t.s
+# RUN: llvm-mc %t.s --triple=x86_64 --as-lex | FileCheck %s
+
+# CHECK-NOT: ERROR: AddressSanitizer
diff --git a/llvm/unittests/MC/CMakeLists.txt b/llvm/unittests/MC/CMakeLists.txt
index da8e219113f46..95b3c4b5a96d1 100644
--- a/llvm/unittests/MC/CMakeLists.txt
+++ b/llvm/unittests/MC/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
MC
MCDisassembler
+ MCParser
Object
Support
TargetParser
@@ -18,8 +19,8 @@ add_llvm_unittest(MCTests
DwarfLineTables.cpp
DwarfLineTableHeaders.cpp
MCInstPrinter.cpp
+ MCParser.cpp
StringTableBuilderTest.cpp
TargetRegistry.cpp
MCDisassemblerTest.cpp
)
-
diff --git a/llvm/unittests/MC/MCParser.cpp b/llvm/unittests/MC/MCParser.cpp
new file mode 100644
index 0000000000000..592e93e9bfd5f
--- /dev/null
+++ b/llvm/unittests/MC/MCParser.cpp
@@ -0,0 +1,61 @@
+//===- llvm/unittest/Object/Disassembler.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+class MCAsmParserTest : public ::testing::Test {
+public:
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+
+ MCAsmParserTest() {
+ llvm::InitializeAllTargetInfos();
+ llvm::InitializeAllTargetMCs();
+
+ StringRef TripleName = "x86_64-pc-linux";
+ Triple TT(TripleName);
+ std::string ErrorStr;
+
+ const Target *TheTarget = TargetRegistry::lookupTarget(TT, ErrorStr);
+
+ // If we didn't build x86, do not run the test.
+ if (!TheTarget)
+ return;
+
+ MRI.reset(TheTarget->createMCRegInfo(TT));
+ MCTargetOptions MCOptions;
+ MAI.reset(TheTarget->createMCAsmInfo(*MRI, TT, MCOptions));
+ }
+};
+} // namespace
+
+TEST_F(MCAsmParserTest, InvalidRead) {
+ AsmLexer Lexer(*MAI);
+ const char* Source = "ret\0 ";
+ StringRef SourceRef(Source, 4); // Include null terminator in buffer length
+ Lexer.setBuffer(SourceRef);
+
+ bool Error = false;
+ while (Lexer.Lex().isNot(AsmToken::Eof)) {
+ if (Lexer.getTok().getKind() == AsmToken::Error)
+ Error = true;
+ }
+ ASSERT_TRUE(Error == false);
+}
>From b05aa6207925af76401186e07907011c6b7d2314 Mon Sep 17 00:00:00 2001
From: "Milczek, Szymon" <szymon.milczek at intel.com>
Date: Wed, 15 Oct 2025 19:36:29 +0200
Subject: [PATCH 2/2] remove not working lit test
---
llvm/test/MC/AsmParser/invalid-read.s | 4 ----
1 file changed, 4 deletions(-)
delete mode 100644 llvm/test/MC/AsmParser/invalid-read.s
diff --git a/llvm/test/MC/AsmParser/invalid-read.s b/llvm/test/MC/AsmParser/invalid-read.s
deleted file mode 100644
index 9555554909dad..0000000000000
--- a/llvm/test/MC/AsmParser/invalid-read.s
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: printf 'ret\0 ' > %t.s
-# RUN: llvm-mc %t.s --triple=x86_64 --as-lex | FileCheck %s
-
-# CHECK-NOT: ERROR: AddressSanitizer
More information about the llvm-commits
mailing list