[llvm] Add support for the .base64 directive (fixes #165499) (PR #165549)

Ben Kallus via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 3 13:08:09 PST 2025


https://github.com/kenballus updated https://github.com/llvm/llvm-project/pull/165549

>From 4ca5466ad319ade27c44903ab25f38e81efb5076 Mon Sep 17 00:00:00 2001
From: Ben Kallus <benjamin.p.kallus.gr at dartmouth.edu>
Date: Fri, 31 Oct 2025 09:06:05 -0400
Subject: [PATCH 1/3] MC/AsmParser: Add .base64 directive

---
 llvm/lib/MC/MCParser/AsmParser.cpp        | 25 +++++++++++++++++++++++
 llvm/test/MC/AsmParser/directive_base64.s | 12 +++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 llvm/test/MC/AsmParser/directive_base64.s

diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index dd1bc2be5feb4..357b114f0b1e3 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -46,6 +46,7 @@
 #include "llvm/MC/MCSymbolMachO.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Base64.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -530,6 +531,7 @@ class AsmParser : public MCAsmParser {
     DK_LTO_SET_CONDITIONAL,
     DK_CFI_MTE_TAGGED_FRAME,
     DK_MEMTAG,
+    DK_BASE64,
     DK_END
   };
 
@@ -552,6 +554,7 @@ class AsmParser : public MCAsmParser {
 
   // ".ascii", ".asciz", ".string"
   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+  bool parseDirectiveBase64();                  // ".base64"
   bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc"
   bool parseDirectiveValue(StringRef IDVal,
                            unsigned Size);       // ".byte", ".long", ...
@@ -1953,6 +1956,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     case DK_ASCIZ:
     case DK_STRING:
       return parseDirectiveAscii(IDVal, true);
+    case DK_BASE64:
+      return parseDirectiveBase64();
     case DK_BYTE:
     case DK_DC_B:
       return parseDirectiveValue(IDVal, 1);
@@ -3076,6 +3081,25 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
   return parseMany(parseOp);
 }
 
+/// parseDirectiveBase64:
+//    ::= .base64 "string"
+bool AsmParser::parseDirectiveBase64() {
+  if (checkForValidSection() ||
+      check(getTok().isNot(AsmToken::String), "expected string")) {
+    return true;
+  }
+
+  std::vector<char> Decoded;
+  std::string const str = getTok().getStringContents().str();
+  if (str.empty() || decodeBase64(str, Decoded)) {
+    return true;
+  }
+
+  getStreamer().emitBytes(std::string(Decoded.begin(), Decoded.end()));
+  Lex();
+  return false;
+}
+
 /// parseDirectiveReloc
 ///  ::= .reloc expression , identifier [ , expression ]
 bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
@@ -5345,6 +5369,7 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".asciz"] = DK_ASCIZ;
   DirectiveKindMap[".string"] = DK_STRING;
   DirectiveKindMap[".byte"] = DK_BYTE;
+  DirectiveKindMap[".base64"] = DK_BASE64;
   DirectiveKindMap[".short"] = DK_SHORT;
   DirectiveKindMap[".value"] = DK_VALUE;
   DirectiveKindMap[".2byte"] = DK_2BYTE;
diff --git a/llvm/test/MC/AsmParser/directive_base64.s b/llvm/test/MC/AsmParser/directive_base64.s
new file mode 100644
index 0000000000000..947895caa171f
--- /dev/null
+++ b/llvm/test/MC/AsmParser/directive_base64.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+        .data
+# CHECK: TEST0:
+# CHECK: .byte 0
+TEST0:
+        .base64 "AA=="
+
+# CHECK: TEST1:
+# CHECK: .ascii "abcxyz"
+TEST1:
+        .base64 "YWJjeHl6"

>From b238faaf7602ab5e0d9173d4410c22e8efa28ada Mon Sep 17 00:00:00 2001
From: Ben Kallus <benjamin.p.kallus.gr at dartmouth.edu>
Date: Fri, 31 Oct 2025 17:23:51 -0400
Subject: [PATCH 2/3] Add negative tests and improve error handling.

---
 llvm/lib/MC/MCParser/AsmParser.cpp        |  8 +++++++-
 llvm/test/MC/AsmParser/directive_base64.s | 18 +++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 439dcbcddb456..6e57035c6beef 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3089,10 +3089,16 @@ bool AsmParser::parseDirectiveBase64() {
 
   std::vector<char> Decoded;
   std::string const str = getTok().getStringContents().str();
-  if (str.empty() || decodeBase64(str, Decoded)) {
+  if (check(str.empty(), "expected nonempty string")) {
     return true;
   }
 
+  llvm::Error e = decodeBase64(str, Decoded);
+  if (e) {
+    consumeError(std::move(e));
+    return Error(Lexer.getLoc(), "failed to base64 decode string data");
+  }
+
   getStreamer().emitBytes(std::string(Decoded.begin(), Decoded.end()));
   Lex();
   return false;
diff --git a/llvm/test/MC/AsmParser/directive_base64.s b/llvm/test/MC/AsmParser/directive_base64.s
index 947895caa171f..df67f7ce8b8ac 100644
--- a/llvm/test/MC/AsmParser/directive_base64.s
+++ b/llvm/test/MC/AsmParser/directive_base64.s
@@ -1,4 +1,5 @@
-# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: not llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: not llvm-mc -triple i386-unknown-unknown -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
 
         .data
 # CHECK: TEST0:
@@ -10,3 +11,18 @@ TEST0:
 # CHECK: .ascii "abcxyz"
 TEST1:
         .base64 "YWJjeHl6"
+
+# CHECK: TEST2:
+# CHECK-ERROR: error: expected string
+TEST2:
+        .base64 not-a-string
+
+# CHECK: TEST3:
+# CHECK-ERROR: error: failed to base64 decode string data
+TEST3:
+        .base64 "AA"
+
+# CHECK: TEST4:
+# CHECK-ERROR: error: expected nonempty string
+TEST4:
+        .base64 ""

>From 15e34e1a3052007e5666bdba729392740b42e5ea Mon Sep 17 00:00:00 2001
From: Ben Kallus <benjamin.p.kallus.gr at dartmouth.edu>
Date: Mon, 3 Nov 2025 16:07:56 -0500
Subject: [PATCH 3/3] Add consecutive base64 test. Make CHECK directives in
 base64 test more specific

---
 llvm/test/MC/AsmParser/directive_base64.s | 35 ++++++++++++++---------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/llvm/test/MC/AsmParser/directive_base64.s b/llvm/test/MC/AsmParser/directive_base64.s
index df67f7ce8b8ac..483e82c61a69a 100644
--- a/llvm/test/MC/AsmParser/directive_base64.s
+++ b/llvm/test/MC/AsmParser/directive_base64.s
@@ -1,28 +1,35 @@
-# RUN: not llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
-# RUN: not llvm-mc -triple i386-unknown-unknown -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: not llvm-mc -triple i386-unknown-unknown -defsym=ERR=1 -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
 
         .data
-# CHECK: TEST0:
-# CHECK: .byte 0
+# CHECK-LABEL: TEST0:
+# CHECK-NEXT: .byte 0
 TEST0:
         .base64 "AA=="
 
-# CHECK: TEST1:
-# CHECK: .ascii "abcxyz"
+# CHECK-LABEL: TEST1:
+# CHECK-NEXT: .ascii "abcxyz"
 TEST1:
         .base64 "YWJjeHl6"
 
-# CHECK: TEST2:
-# CHECK-ERROR: error: expected string
+# CHECK-LABEL: TEST2:
+# CHECK-NEXT: .byte 1
+# CHECK-EMPTY:
+# CHECK-NEXT: .byte 2
 TEST2:
-        .base64 not-a-string
+        .base64 "AQ=="
+        .base64 "Ag=="
 
-# CHECK: TEST3:
-# CHECK-ERROR: error: failed to base64 decode string data
+.ifdef ERR
+# CHECK-ERROR: [[#@LINE+2]]:17: error: expected string
 TEST3:
-        .base64 "AA"
+        .base64 not-a-string
 
-# CHECK: TEST4:
-# CHECK-ERROR: error: expected nonempty string
+# CHECK-ERROR: [[#@LINE+2]]:17: error: failed to base64 decode string data
 TEST4:
+        .base64 "AA"
+
+# CHECK-ERROR: [[#@LINE+2]]:17: error: expected nonempty string
+TEST5:
         .base64 ""
+.endif



More information about the llvm-commits mailing list