[Mlir-commits] [mlir] [mlir][ODS] Name obfuscation for private dialects (PR #198083)
Matthias Springer
llvmlistbot at llvm.org
Wed May 20 07:00:17 PDT 2026
https://github.com/matthias-springer updated https://github.com/llvm/llvm-project/pull/198083
>From 9d61990ba3a7b1016787d2d4f792428182207399 Mon Sep 17 00:00:00 2001
From: Matthias Springer <me at m-sp.org>
Date: Sat, 16 May 2026 09:36:32 +0000
Subject: [PATCH] strip op and pass names
---
mlir/CMakeLists.txt | 53 +++++
mlir/docs/PrivateNameObfuscation.md | 193 ++++++++++++++++++
mlir/include/mlir/TableGen/AttrOrTypeDef.h | 5 +
mlir/include/mlir/TableGen/Dialect.h | 5 +
mlir/include/mlir/TableGen/Operator.h | 5 +
mlir/include/mlir/TableGen/PrivateName.h | 79 +++++++
mlir/lib/TableGen/AttrOrTypeDef.cpp | 2 +
mlir/lib/TableGen/Dialect.cpp | 23 +++
mlir/lib/TableGen/Operator.cpp | 2 +
mlir/lib/Tools/mlir-tblgen/MlirTblgenMain.cpp | 2 +
.../mlir-tblgen/private-name-obfuscation.td | 174 ++++++++++++++++
.../mlir-tblgen/private-pass-obfuscation.td | 79 +++++++
mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp | 18 +-
mlir/tools/mlir-tblgen/CMakeLists.txt | 1 +
mlir/tools/mlir-tblgen/DialectGen.cpp | 7 +-
mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 47 ++++-
mlir/tools/mlir-tblgen/PassGen.cpp | 46 ++++-
mlir/tools/mlir-tblgen/PrivateName.cpp | 160 +++++++++++++++
mlir/tools/mlir-tblgen/RewriterGen.cpp | 30 ++-
19 files changed, 895 insertions(+), 36 deletions(-)
create mode 100644 mlir/docs/PrivateNameObfuscation.md
create mode 100644 mlir/include/mlir/TableGen/PrivateName.h
create mode 100644 mlir/test/mlir-tblgen/private-name-obfuscation.td
create mode 100644 mlir/test/mlir-tblgen/private-pass-obfuscation.td
create mode 100644 mlir/tools/mlir-tblgen/PrivateName.cpp
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 6d05fa50ecd05..8f2a66074fcda 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -152,6 +152,59 @@ set(MLIR_ENABLE_NVPTXCOMPILER 0 CACHE BOOL
set(MLIR_ENABLE_PDL_IN_PATTERNMATCH 1 CACHE BOOL "Enable PDL in PatternMatch")
+#-------------------------------------------------------------------------------
+# Private name obfuscation
+#
+# Three knobs, all supplied by the build (privacy of a dialect or a pass
+# depends on the tool consuming it, not on the source definition):
+#
+# MLIR_PRIVATE_NAME_OBFUSCATOR : a shell command that obfuscates one name.
+# mlir-tblgen pipes each name into this
+# command via stdin and uses the first
+# whitespace-delimited token of stdout
+# (prefixed with `_`) as the obfuscated
+# name.
+# MLIR_PRIVATE_DIALECTS : a list of dialect names whose ops,
+# attributes, and types should have their
+# mnemonics obfuscated.
+# MLIR_PRIVATE_PASSES : if ON, all passes are treated as private:
+# their arguments / names are obfuscated
+# and their description / per-option /
+# per-statistic descriptions are emitted
+# as empty strings.
+#
+# Obfuscation only happens for items that are private AND when
+# MLIR_PRIVATE_NAME_OBFUSCATOR is also set. This makes it harder to reverse
+# engineer the resulting binary while keeping registration, pattern
+# matching, bytecode, and IR printing internally consistent across
+# translation units of one build.
+#
+# Choosing a stable obfuscator is the user's responsibility: e.g.
+# `md5sum | awk '{print $1}'`, an HMAC with a salt, or any custom script.
+# The obfuscator must produce a unique output per unique input (no
+# collisions); hashing tools usually satisfy this in practice. Bytecode
+# produced by a build with one obfuscator is generally unreadable by any
+# build that uses a different obfuscator.
+#-------------------------------------------------------------------------------
+set(MLIR_PRIVATE_NAME_OBFUSCATOR "" CACHE STRING
+ "Shell command fed to mlir-tblgen for private-name obfuscation; empty disables obfuscation.")
+set(MLIR_PRIVATE_DIALECTS "" CACHE STRING
+ "Semicolon-separated list of dialect names whose ops/attrs/types are obfuscated.")
+option(MLIR_PRIVATE_PASSES
+ "Treat all passes as private (obfuscate names, empty descriptions)."
+ OFF)
+
+if(MLIR_PRIVATE_NAME_OBFUSCATOR)
+ list(APPEND LLVM_TABLEGEN_FLAGS "--mlir-private-name-obfuscator=${MLIR_PRIVATE_NAME_OBFUSCATOR}")
+endif()
+if(MLIR_PRIVATE_DIALECTS)
+ string(REPLACE ";" "," _mlir_private_dialects_csv "${MLIR_PRIVATE_DIALECTS}")
+ list(APPEND LLVM_TABLEGEN_FLAGS "--mlir-private-dialects=${_mlir_private_dialects_csv}")
+endif()
+if(MLIR_PRIVATE_PASSES)
+ list(APPEND LLVM_TABLEGEN_FLAGS "--mlir-private-passes")
+endif()
+
option(MLIR_INCLUDE_TESTS
"Generate build targets for the MLIR unit tests."
${LLVM_INCLUDE_TESTS})
diff --git a/mlir/docs/PrivateNameObfuscation.md b/mlir/docs/PrivateNameObfuscation.md
new file mode 100644
index 0000000000000..fa5c6beb0a3ea
--- /dev/null
+++ b/mlir/docs/PrivateNameObfuscation.md
@@ -0,0 +1,193 @@
+# Private Name Obfuscation
+
+[TOC]
+
+This page documents an opt-in TableGen + CMake mechanism for replacing the
+human-readable names of dialects, operations, attributes, types, and passes
+in a built MLIR binary with opaque identifiers. The intent is to make it
+harder to reverse-engineer a release binary while preserving a fully-readable
+internal/test build from the same source tree.
+
+The whole obfuscation logic is delegated to an external shell command
+supplied by the build, so MLIR itself does not embed any particular hash
+function or secret material.
+
+## What gets obfuscated
+
+Three knobs control what gets obfuscated:
+
+* `-DMLIR_PRIVATE_NAME_OBFUSCATOR=<shell-cmd>` — the external command used
+ to obfuscate one name at a time.
+* `-DMLIR_PRIVATE_DIALECTS=<d1;d2;...>` — the (semicolon-separated) list of
+ dialect namespaces whose items should be obfuscated.
+* `-DMLIR_PRIVATE_PASSES=ON` — when set, treat *every* pass as private.
+
+Whether a given dialect or pass is "private" is a property of the tool
+consuming it, not of the source definition, so both knobs are supplied by
+the build rather than authored in ODS.
+
+When `MLIR_PRIVATE_NAME_OBFUSCATOR` is set together with either of the
+other two, the matching items get the literals below replaced with opaque
+identifiers:
+
+| Source | Affected literal(s) |
+| -------------------------- | ------------------------------------------------------------ |
+| `Dialect` | `getDialectNamespace()`, dialect constructor |
+| `Op` | `getOperationName()`, adaptor `odsOpName`, error prefixes |
+| `AttrDef`, `TypeDef` | `getMnemonic()`, `name`, `dialectName`, alias `getAlias` |
+| `PassBase` / `Pass` | `getArgument()`, `getArgumentName()`, `getName()`, `getPassName()` |
+
+For each unique name `N`, mlir-tblgen runs roughly:
+
+```sh
+printf '%s' 'N' | <shell-cmd>
+```
+
+It then takes the first whitespace-delimited token from stdout, prefixes
+it with `_` (so the result is always a valid MLIR mnemonic / C++ identifier),
+and uses that as the obfuscated name. Results are cached, so the command is
+invoked at most once per unique name within a single mlir-tblgen invocation.
+
+For op / attribute / type names, the obfuscator is invoked separately on
+the dialect prefix and on the mnemonic, and the results are rejoined with
+a dot, so the runtime helper `OperationName::getDialectNamespace()` (which
+splits at the first `.`) keeps working.
+
+Obfuscated names are **deterministic** as long as the configured command
+is deterministic, so all translation units within one build agree on the
+obfuscated spelling. Pattern matching, `ConversionTarget`, the bytecode
+reader/writer (which uses whatever `getDialectNamespace()` /
+`getOperationName()` return), and dialect registration all keep working
+without source changes.
+
+Private ops also behave as if no `assemblyFormat` or
+`hasCustomAssemblyFormat` was specified when private-name obfuscation is
+enabled. ODS does not generate the custom/declarative `parse` and `print`
+methods for those ops. They still print in generic form, using the
+obfuscated operation name, and their custom textual syntax is rejected.
+
+Op argument names (the keys returned by `getAttributeNames()` / used by
+generated `StringAttr` accessors) are **not** obfuscated. They are dictionary
+keys with established meaning across patterns, properties, and serialization,
+so they remain in their original form even on private ops. Raw string lookups
+such as `op->getAttr("foo")` therefore keep working unchanged.
+
+When `MLIR_PRIVATE_PASSES=ON`, the descriptions returned by
+`getDescription()`, the per-option `cl::desc(...)` text, and the
+per-statistic description text are also emitted as empty strings (since
+plaintext descriptions would otherwise leak the meaning of the obfuscated
+name). Per-pass `register{PassName}()` helpers and the
+`mlirCreate{Group}{PassName}` / `mlirRegister{Group}{PassName}` C-API
+entries keep being emitted under their original C++ class names; they
+register and invoke the pass under its obfuscated textual argument and
+display name.
+
+## Build configuration
+
+```cmake
+# Public/test/dev build: no obfuscation (default).
+cmake -G Ninja path/to/llvm \
+ -DLLVM_ENABLE_PROJECTS=mlir
+
+# Release build: obfuscate the listed dialects and all passes using an
+# external obfuscator command.
+cmake -G Ninja path/to/llvm \
+ -DLLVM_ENABLE_PROJECTS=mlir \
+ -DMLIR_PRIVATE_NAME_OBFUSCATOR="md5sum | awk '{print \$1}'" \
+ -DMLIR_PRIVATE_DIALECTS="my_internal_dialect;my_lowering_dialect" \
+ -DMLIR_PRIVATE_PASSES=ON
+```
+
+`MLIR_PRIVATE_NAME_OBFUSCATOR` controls private-name obfuscation: leave it
+empty to disable obfuscation, or set it to a shell command. The command
+receives each name on stdin (no trailing newline) and should write the
+obfuscated form to stdout. mlir-tblgen uses the first whitespace-delimited
+token of that output as the obfuscated name, so simple tools that emit
+extra fields (such as `md5sum`, which prints `<hash> -`) work out of the
+box.
+
+Choose any deterministic, build-stable obfuscator for production builds.
+The obfuscator must produce a unique output per unique input (no
+collisions); hashing tools usually satisfy this in practice but it is the
+user's responsibility to pick something appropriate. Examples:
+
+* `md5sum`
+* `sha256sum`
+* An HMAC pipeline with a build-private key:
+ `openssl dgst -sha256 -hmac "$RELEASE_SALT" -hex | awk '{print $2}'`
+* A custom script that mixes in a vendor-specific salt before hashing.
+
+The obfuscator command and the dialect list are fed directly to the
+`mlir-tblgen` command line as `--mlir-private-name-obfuscator=<cmd>` and
+`--mlir-private-dialects=<csv>`. They are **not** embedded in the resulting
+binary. Bytecode produced by a build with one (obfuscator, dialect-list)
+pair is generally unreadable by a build that uses a different pair, so
+pin both across releases that need binary compatibility.
+
+## Marking items as private
+
+Neither dialects nor passes are marked private in ODS — whether to obfuscate
+them depends on the tool consuming them (e.g., `scf` and `Canonicalizer` may
+be private in one downstream compiler and public in another). Privacy is
+configured entirely by the build:
+
+* A dialect becomes private by being listed in `MLIR_PRIVATE_DIALECTS`. All
+ ops, attributes, and types declared by a private dialect are obfuscated;
+ there is no per-op or per-def override.
+* All passes become private when `MLIR_PRIVATE_PASSES=ON`. The toggle is
+ global — there is no per-pass override either.
+
+```tablegen
+def MyDialect : Dialect {
+ let name = "mydialect";
+ let cppNamespace = "::my";
+}
+
+def MyOp : Op<MyDialect, "do_thing", []>; // obfuscated iff MyDialect is
+ // in MLIR_PRIVATE_DIALECTS
+
+def MyPass : Pass<"my-pass"> { // obfuscated iff
+ let summary = "Does the thing."; // MLIR_PRIVATE_PASSES=ON
+}
+```
+
+## Caveats
+
+* Hand-written code that compares `op->getName().getStringRef()` against a
+ spelled-out op name (e.g. `== "mydialect.do_thing"`) breaks under
+ obfuscation. Migrate such checks to `isa<my::DoThingOp>()` (which uses
+ TypeID and is unaffected) or compare against
+ `my::DoThingOp::getOperationName()` (which is itself obfuscated, so the
+ comparison still works).
+* Diagnostics and verifier messages naturally print the obfuscated names
+ because they use runtime `getName()` / `getDialect()->getNamespace()`
+ calls. The English skeleton text around the names is not stripped.
+* Python bindings emitted by `gen-python-op-bindings` are not adjusted by
+ this mechanism. Do not generate Python bindings for private dialects or
+ ops.
+* `LLVM_DEBUG`, `LDBG`, statistics (`LLVM_ENABLE_STATS`), and `--debug`
+ paths are already removed from a release build (`NDEBUG`); they don't
+ need separate handling.
+* The obfuscator is spawned via `popen` (or `_popen` on Windows), so the
+ command must be runnable by the host's default shell, and must be on
+ `PATH` when mlir-tblgen runs.
+
+## Audit checklist for downstream trees
+
+Before enabling private-name obfuscation in a downstream compiler, audit for
+hand-written string comparisons and textual pipeline dependencies. These
+patterns should generally be rewritten to use TypeID-based APIs, concrete op
+classes, or the generated `::getOperationName()` accessors:
+
+```sh
+rg 'getName\(\)\.getStringRef\(\).*==|==.*getName\(\)\.getStringRef\(\)' path/to/downstream
+rg 'OperationName\("[^"]+"' path/to/downstream
+rg 'RegisteredOperationName::lookup\("[^"]+"' path/to/downstream
+rg 'getOrLoadDialect\("[^"]+"' path/to/downstream
+rg 'PassInfo::lookup\("[^"]+"' path/to/downstream
+rg 'parsePassPipeline|--pass-pipeline|register.*Passes' path/to/downstream
+```
+
+Comparisons against generated names, such as
+`my::DoThingOp::getOperationName()`, remain valid because the generated
+method returns the obfuscated spelling in release builds.
diff --git a/mlir/include/mlir/TableGen/AttrOrTypeDef.h b/mlir/include/mlir/TableGen/AttrOrTypeDef.h
index 65992f9fef5e9..cd449cc26fa38 100644
--- a/mlir/include/mlir/TableGen/AttrOrTypeDef.h
+++ b/mlir/include/mlir/TableGen/AttrOrTypeDef.h
@@ -220,6 +220,11 @@ class AttrOrTypeDef {
/// using the mnemonic.
bool genMnemonicAlias() const;
+ /// Returns true if this attribute or type belongs to a dialect marked
+ /// `isPrivate`. Private def names are eligible for obfuscation when
+ /// private-name obfuscation is enabled in mlir-tblgen.
+ bool isPrivate() const;
+
/// Get the code location (for error printing).
ArrayRef<SMLoc> getLoc() const;
diff --git a/mlir/include/mlir/TableGen/Dialect.h b/mlir/include/mlir/TableGen/Dialect.h
index 30f9d690b678d..232d3e58d4f0a 100644
--- a/mlir/include/mlir/TableGen/Dialect.h
+++ b/mlir/include/mlir/TableGen/Dialect.h
@@ -88,6 +88,11 @@ class Dialect {
/// operations or types.
bool isExtensible() const;
+ /// Returns true if this dialect has been marked private. Private dialect,
+ /// op, attribute, and type names are eligible for obfuscation when
+ /// private-name obfuscation is enabled in mlir-tblgen.
+ bool isPrivate() const;
+
const llvm::DagInit *getDiscardableAttributes() const;
const llvm::Record *getDef() const { return def; }
diff --git a/mlir/include/mlir/TableGen/Operator.h b/mlir/include/mlir/TableGen/Operator.h
index f0514d8e61748..9d7bc2a08050d 100644
--- a/mlir/include/mlir/TableGen/Operator.h
+++ b/mlir/include/mlir/TableGen/Operator.h
@@ -86,6 +86,11 @@ class Operator {
/// format if its dialect name is not empty.
std::string getOperationName() const;
+ /// Returns true if this op belongs to a dialect marked `isPrivate`. When
+ /// private-name obfuscation is enabled in mlir-tblgen, the op's dialect
+ /// prefix and mnemonic in its registered name are obfuscated.
+ bool isPrivate() const;
+
/// Returns this op's C++ class name.
StringRef getCppClassName() const;
diff --git a/mlir/include/mlir/TableGen/PrivateName.h b/mlir/include/mlir/TableGen/PrivateName.h
new file mode 100644
index 0000000000000..1082d52a492ef
--- /dev/null
+++ b/mlir/include/mlir/TableGen/PrivateName.h
@@ -0,0 +1,79 @@
+//===- PrivateName.h - Private name obfuscation for ODS ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Public API for ODS-driven obfuscation of dialect namespaces, op mnemonics,
+// AttrDef/TypeDef mnemonics, and pass arguments / names. Which dialects are
+// private is configured by the build via `--mlir-private-dialects`. Whether
+// passes are private is a single global toggle controlled by the build via
+// `--mlir-private-passes` (a pass's privacy depends on the tool consuming
+// it, just like a dialect's). Op argument attribute keys are intentionally
+// not obfuscated. For private passes, the description / per-option /
+// per-statistic descriptions are also emitted as empty strings when
+// obfuscation is enabled.
+//
+// All of the configuration / plumbing (the cl::opts, the obfuscator-
+// subprocess invocation, and the name cache) lives in
+// `mlir/tools/mlir-tblgen/PrivateName.cpp` so that callers of this header
+// only see the pure query API.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TABLEGEN_PRIVATENAME_H_
+#define MLIR_TABLEGEN_PRIVATENAME_H_
+
+#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <string>
+
+namespace mlir {
+namespace tblgen {
+
+/// Returns true if private-name obfuscation is enabled, i.e., a non-empty
+/// obfuscator command has been configured via `--mlir-private-name-obfuscator`.
+bool obfuscatePrivateNamesEnabled();
+
+/// Mark the dialect with the given namespace as private. Called by the
+/// mlir-tblgen driver once per value parsed from `--mlir-private-dialects`.
+void addPrivateDialect(StringRef dialectName);
+
+/// Returns true if the dialect with the given namespace has been listed as
+/// private via `--mlir-private-dialects` on the mlir-tblgen command line.
+/// Privacy of a dialect is a build-level concern (it depends on which tool
+/// the dialect is being compiled into), so it is not tracked in ODS.
+bool isDialectPrivate(StringRef dialectName);
+
+/// Returns true if `--mlir-private-passes` was set on the mlir-tblgen
+/// command line, i.e., all passes should be treated as private. Privacy of
+/// a pass is a build-level concern (it depends on which tool the pass is
+/// being compiled into), so it is not tracked in ODS.
+bool arePassesPrivate();
+
+/// Returns the obfuscated form of `name`. The returned StringRef is stable
+/// for the lifetime of the process. The configured obfuscator is invoked at
+/// most once per distinct name.
+StringRef obfuscatePrivateName(StringRef name);
+
+/// Returns either `name` (when not private or obfuscation is disabled) or
+/// `obfuscatePrivateName(name)`.
+inline StringRef maybeObfuscate(StringRef name, bool isPrivate) {
+ if (!isPrivate || !obfuscatePrivateNamesEnabled())
+ return name;
+ return obfuscatePrivateName(name);
+}
+
+/// For a dotted name "dialect.mnemonic", obfuscates the dialect prefix and
+/// the mnemonic suffix independently and rejoins them with a dot. This keeps
+/// runtime parsing of the dialect prefix in `OperationName` working. Names
+/// without a '.' are obfuscated as-is.
+std::string maybeObfuscateDotted(StringRef name, bool isPrivate);
+
+} // namespace tblgen
+} // namespace mlir
+
+#endif // MLIR_TABLEGEN_PRIVATENAME_H_
diff --git a/mlir/lib/TableGen/AttrOrTypeDef.cpp b/mlir/lib/TableGen/AttrOrTypeDef.cpp
index bf835a860cd5b..6c3d31267e1c1 100644
--- a/mlir/lib/TableGen/AttrOrTypeDef.cpp
+++ b/mlir/lib/TableGen/AttrOrTypeDef.cpp
@@ -211,6 +211,8 @@ bool AttrOrTypeDef::genMnemonicAlias() const {
return def->getValueAsBit("genMnemonicAlias");
}
+bool AttrOrTypeDef::isPrivate() const { return getDialect().isPrivate(); }
+
ArrayRef<SMLoc> AttrOrTypeDef::getLoc() const { return def->getLoc(); }
bool AttrOrTypeDef::skipDefaultBuilders() const {
diff --git a/mlir/lib/TableGen/Dialect.cpp b/mlir/lib/TableGen/Dialect.cpp
index 7aaf4dd57c50e..ab0bc2aec0d60 100644
--- a/mlir/lib/TableGen/Dialect.cpp
+++ b/mlir/lib/TableGen/Dialect.cpp
@@ -11,6 +11,8 @@
//===----------------------------------------------------------------------===//
#include "mlir/TableGen/Dialect.h"
+#include "mlir/TableGen/PrivateName.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
@@ -102,6 +104,27 @@ bool Dialect::isExtensible() const {
return def->getValueAsBit("isExtensible");
}
+namespace {
+/// Process-wide set of dialect namespaces marked private via
+/// `--mlir-private-dialects` on the mlir-tblgen command line. Function-local
+/// static to avoid global constructors (MLIRTableGen is built with
+/// `-Werror=global-constructors`).
+llvm::StringSet<> &privateDialectStorage() {
+ static llvm::StringSet<> set;
+ return set;
+}
+} // namespace
+
+void mlir::tblgen::addPrivateDialect(StringRef dialectName) {
+ privateDialectStorage().insert(dialectName);
+}
+
+bool mlir::tblgen::isDialectPrivate(StringRef dialectName) {
+ return privateDialectStorage().contains(dialectName);
+}
+
+bool Dialect::isPrivate() const { return tblgen::isDialectPrivate(getName()); }
+
const llvm::DagInit *Dialect::getDiscardableAttributes() const {
return def->getValueAsDag("discardableAttrs");
}
diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp
index 82dfbcbfa4d4f..bf6836f546a23 100644
--- a/mlir/lib/TableGen/Operator.cpp
+++ b/mlir/lib/TableGen/Operator.cpp
@@ -68,6 +68,8 @@ std::string Operator::getOperationName() const {
return std::string(llvm::formatv("{0}.{1}", prefix, opName));
}
+bool Operator::isPrivate() const { return dialect.isPrivate(); }
+
std::string Operator::getAdaptorName() const {
return std::string(llvm::formatv("{0}Adaptor", getCppClassName()));
}
diff --git a/mlir/lib/Tools/mlir-tblgen/MlirTblgenMain.cpp b/mlir/lib/Tools/mlir-tblgen/MlirTblgenMain.cpp
index 64e86f2a62073..a84571c4b2177 100644
--- a/mlir/lib/Tools/mlir-tblgen/MlirTblgenMain.cpp
+++ b/mlir/lib/Tools/mlir-tblgen/MlirTblgenMain.cpp
@@ -151,6 +151,8 @@ int mlir::MlirTblgenMain(int argc, char **argv) {
llvm::cl::opt<const mlir::GenInfo *, true, mlir::GenNameParser> generator(
"", llvm::cl::desc("Generator to run"), cl::location(::generator));
+ // Private-name obfuscation and pass-metadata stripping are configured via
+ // `cl::opt`s registered in `PrivateName.cpp`.
cl::ParseCommandLineOptions(argc, argv);
return TableGenMain(
diff --git a/mlir/test/mlir-tblgen/private-name-obfuscation.td b/mlir/test/mlir-tblgen/private-name-obfuscation.td
new file mode 100644
index 0000000000000..ab7725f8053ae
--- /dev/null
+++ b/mlir/test/mlir-tblgen/private-name-obfuscation.td
@@ -0,0 +1,174 @@
+// Verifies that --mlir-private-name-obfuscator replaces the names of items
+// in dialects listed via --mlir-private-dialects with deterministic opaque
+// identifiers, and leaves items in other dialects unchanged.
+//
+// RUN: mlir-tblgen -gen-dialect-decls -dialect=publicd -I %S/../../include %s \
+// RUN: | FileCheck %s --check-prefix=PUBDD-OFF
+// RUN: mlir-tblgen -gen-dialect-decls -dialect=publicd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=PUBDD-ON
+// RUN: mlir-tblgen -gen-dialect-decls -dialect=privd -I %S/../../include %s \
+// RUN: | FileCheck %s --check-prefix=PRIVDD-OFF
+// RUN: mlir-tblgen -gen-dialect-decls -dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=PRIVDD-ON
+// RUN: mlir-tblgen -gen-op-decls -dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=PRIVOP-ON
+// RUN: mlir-tblgen -gen-op-decls -dialect=publicd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=PUBOP-ON
+// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=PRIVFORMAT-ON
+// RUN: mlir-tblgen -gen-attrdef-decls -attrdefs-dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=ATTR-PRIV-ON
+// RUN: mlir-tblgen -gen-attrdef-decls -attrdefs-dialect=publicd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=ATTR-PUB-ON
+// RUN: mlir-tblgen -gen-attrdef-defs -attrdefs-dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=ATTR-PRIV-DEF-ON
+// RUN: mlir-tblgen -gen-typedef-decls -typedefs-dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=TYPE-PRIV-ON
+// RUN: mlir-tblgen -gen-typedef-decls -typedefs-dialect=publicd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=TYPE-PUB-ON
+// RUN: mlir-tblgen -gen-typedef-defs -typedefs-dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd \
+// RUN: | FileCheck %s --check-prefix=TYPE-PRIV-DEF-ON
+//
+// Determinism: rerunning with the same obfuscator produces identical output.
+// RUN: mlir-tblgen -gen-dialect-decls -dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd > %t.first
+// RUN: mlir-tblgen -gen-dialect-decls -dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-dialects=privd > %t.second
+// RUN: cmp %t.first %t.second
+//
+// A different obfuscator produces different output.
+// RUN: mlir-tblgen -gen-dialect-decls -dialect=privd -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=sha1sum --mlir-private-dialects=privd > %t.alt
+// RUN: not cmp %t.first %t.alt
+
+include "mlir/IR/OpBase.td"
+include "mlir/IR/AttrTypeBase.td"
+
+def PublicDialect : Dialect {
+ let name = "publicd";
+ let cppNamespace = "::publicd";
+}
+
+def PrivateDialect : Dialect {
+ let name = "privd";
+ let cppNamespace = "::privd";
+}
+
+def Public_AddOp : Op<PublicDialect, "add", []>;
+def Public_PublicFormatOp : Op<PublicDialect, "format", []> {
+ let assemblyFormat = "attr-dict";
+}
+def Private_MulOp : Op<PrivateDialect, "mul", []>;
+def Private_PrivateFormatOp : Op<PrivateDialect, "format", []> {
+ let assemblyFormat = "attr-dict";
+}
+
+def Public_PublicAttr : AttrDef<PublicDialect, "Public"> {
+ let mnemonic = "public_attr";
+ let genMnemonicAlias = 1;
+}
+def Private_PrivateAttr : AttrDef<PrivateDialect, "Private"> {
+ let mnemonic = "private_attr";
+ let genMnemonicAlias = 1;
+}
+
+def Public_PublicType : TypeDef<PublicDialect, "Public"> {
+ let mnemonic = "public_type";
+ let genMnemonicAlias = 1;
+}
+def Private_PrivateType : TypeDef<PrivateDialect, "Private"> {
+ let mnemonic = "private_type";
+ let genMnemonicAlias = 1;
+}
+
+// The obfuscated form of each private TableGen name in this file is the
+// md5 hash of the name, prefixed with `_`:
+//
+// printf %s privd | md5sum -> 1e8d1fee65b16e634839dea9f85d3429
+// printf %s mul | md5sum -> 353942263d1bedfbe06b7bfa78226253
+// printf %s format | md5sum -> 1ddcb92ade31c8fbd370001f9b29a7d9
+// printf %s private_attr | md5sum -> d0f28b2f0516a6ff810258f8d1b6899d
+// printf %s private_type | md5sum -> ff03a49c6c57d69cd51f5420a768f9ef
+
+// Public dialect, obfuscation off: original namespace.
+// PUBDD-OFF: getDialectNamespace()
+// PUBDD-OFF: StringLiteral("publicd")
+
+// Public dialect, obfuscation on: still original namespace.
+// PUBDD-ON: getDialectNamespace()
+// PUBDD-ON: StringLiteral("publicd")
+
+// Private dialect, obfuscation off: original namespace.
+// PRIVDD-OFF: getDialectNamespace()
+// PRIVDD-OFF: StringLiteral("privd")
+
+// Private dialect, obfuscation on: md5(privd) with `_` prefix.
+// PRIVDD-ON: getDialectNamespace()
+// PRIVDD-ON: StringLiteral("_1e8d1fee65b16e634839dea9f85d3429")
+// PRIVDD-ON-NOT: StringLiteral("privd")
+
+// Op of private dialect, obfuscation on: both halves obfuscated. The first
+// private op encountered (alphabetically) is MulOp.
+// PRIVOP-ON: getOperationName()
+// PRIVOP-ON: StringLiteral("_1e8d1fee65b16e634839dea9f85d3429._353942263d1bedfbe06b7bfa78226253")
+// PRIVOP-ON-NOT: StringLiteral("privd.mul")
+// PRIVOP-ON-NOT: StringLiteral("privd.format")
+
+// Op of public dialect, obfuscation on: unchanged.
+// PUBOP-ON: getOperationName()
+// PUBOP-ON: StringLiteral("publicd.add")
+
+// Declarative assembly formats of private ops are not generated under private
+// obfuscation. They therefore fall back to generic printing and reject the
+// custom textual syntax.
+// PRIVFORMAT-ON-LABEL: class PrivateFormatOp : public
+// PRIVFORMAT-ON: _1e8d1fee65b16e634839dea9f85d3429._1ddcb92ade31c8fbd370001f9b29a7d9
+// PRIVFORMAT-ON-NOT: static ::mlir::ParseResult parse
+// PRIVFORMAT-ON-NOT: void print
+// PRIVFORMAT-ON-LABEL: class PublicFormatOp : public
+// PRIVFORMAT-ON: publicd.format
+// PRIVFORMAT-ON: static ::mlir::ParseResult parse
+// PRIVFORMAT-ON: void print
+
+// Attribute names and mnemonics of private dialects are obfuscated, while
+// public attributes remain unchanged.
+// ATTR-PRIV-ON-LABEL: class PrivateAttr : public
+// ATTR-PRIV-ON: static constexpr ::llvm::StringLiteral name = "_1e8d1fee65b16e634839dea9f85d3429._d0f28b2f0516a6ff810258f8d1b6899d";
+// ATTR-PRIV-ON: static constexpr ::llvm::StringLiteral dialectName = "_1e8d1fee65b16e634839dea9f85d3429";
+// ATTR-PRIV-ON: getMnemonic()
+// ATTR-PRIV-ON: return {"_d0f28b2f0516a6ff810258f8d1b6899d"};
+// ATTR-PRIV-ON-NOT: private_attr
+// ATTR-PRIV-DEF-ON: os << "_d0f28b2f0516a6ff810258f8d1b6899d";
+// ATTR-PRIV-DEF-ON-NOT: private_attr
+// ATTR-PUB-ON-LABEL: class PublicAttr : public
+// ATTR-PUB-ON: static constexpr ::llvm::StringLiteral name = "publicd.public_attr";
+// ATTR-PUB-ON: static constexpr ::llvm::StringLiteral dialectName = "publicd";
+// ATTR-PUB-ON: getMnemonic()
+// ATTR-PUB-ON: return {"public_attr"};
+
+// Type names and mnemonics of private dialects are obfuscated, while public
+// types remain unchanged.
+// TYPE-PRIV-ON-LABEL: class PrivateType : public
+// TYPE-PRIV-ON: static constexpr ::llvm::StringLiteral name = "_1e8d1fee65b16e634839dea9f85d3429._ff03a49c6c57d69cd51f5420a768f9ef";
+// TYPE-PRIV-ON: static constexpr ::llvm::StringLiteral dialectName = "_1e8d1fee65b16e634839dea9f85d3429";
+// TYPE-PRIV-ON: getMnemonic()
+// TYPE-PRIV-ON: return {"_ff03a49c6c57d69cd51f5420a768f9ef"};
+// TYPE-PRIV-ON-NOT: private_type
+// TYPE-PRIV-DEF-ON: os << "_ff03a49c6c57d69cd51f5420a768f9ef";
+// TYPE-PRIV-DEF-ON-NOT: private_type
+// TYPE-PUB-ON-LABEL: class PublicType : public
+// TYPE-PUB-ON: static constexpr ::llvm::StringLiteral name = "publicd.public_type";
+// TYPE-PUB-ON: static constexpr ::llvm::StringLiteral dialectName = "publicd";
+// TYPE-PUB-ON: getMnemonic()
+// TYPE-PUB-ON: return {"public_type"};
diff --git a/mlir/test/mlir-tblgen/private-pass-obfuscation.td b/mlir/test/mlir-tblgen/private-pass-obfuscation.td
new file mode 100644
index 0000000000000..968c893c75fc9
--- /dev/null
+++ b/mlir/test/mlir-tblgen/private-pass-obfuscation.td
@@ -0,0 +1,79 @@
+// Verifies that --mlir-private-passes + --mlir-private-name-obfuscator
+// obfuscates the argument and name of every pass, empties out the pass /
+// per-option / per-statistic descriptions, and keeps emitting the
+// registration / C-API helpers under each pass's (unchanged) C++ class
+// name.
+//
+// RUN: mlir-tblgen -gen-pass-decls -name=Test -I %S/../../include %s \
+// RUN: | FileCheck %s --check-prefix=BASELINE
+// RUN: mlir-tblgen -gen-pass-decls -name=Test -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-passes \
+// RUN: | FileCheck %s --check-prefix=OBFUSCATED
+// RUN: mlir-tblgen -gen-pass-capi-header -prefix=Test -I %S/../../include %s \
+// RUN: --mlir-private-name-obfuscator=md5sum --mlir-private-passes \
+// RUN: | FileCheck %s --check-prefix=CAPI
+
+include "mlir/Pass/PassBase.td"
+
+def FirstTestPass : Pass<"first-test-pass"> {
+ let summary = "First pass summary";
+ let options = [
+ Option<"opt", "first-opt", "int", "0", "First option description.">
+ ];
+}
+
+def SecondTestPass : Pass<"second-test-pass"> {
+ let summary = "Second pass summary";
+}
+
+// Per-pass blocks emerge in alphabetical order, and registration helpers
+// follow the same order.
+//
+// The obfuscated forms used below are the md5 hashes of the original
+// names, prefixed with `_`:
+//
+// printf %s first-test-pass | md5sum -> 21ebd1b31ed5dbc81e5aa9e2fed732f3
+// printf %s FirstTestPass | md5sum -> 6f7c29e76fd9f6f176504b48059b6b8b
+// printf %s second-test-pass | md5sum -> ba8fce64443d8c815b0ed7477793587c
+// printf %s SecondTestPass | md5sum -> 6b8d928968d2a98e9bc5af1bffb90f20
+//
+// Baseline (no flags): names and descriptions are emitted verbatim, both
+// passes are registered.
+// BASELINE: getArgument() const override { return "first-test-pass"; }
+// BASELINE: getDescription() const override { return R"PD(First pass summary)PD"; }
+// BASELINE: getName() const override { return "FirstTestPass"; }
+// BASELINE: ::llvm::cl::desc(R"PO(First option description.)PO")
+// BASELINE: getArgument() const override { return "second-test-pass"; }
+// BASELINE: getDescription() const override { return R"PD(Second pass summary)PD"; }
+// BASELINE: getName() const override { return "SecondTestPass"; }
+// BASELINE: inline void registerFirstTestPass()
+// BASELINE: inline void registerSecondTestPass()
+
+// Obfuscated: every pass argument and name is hashed; every description,
+// per-option `cl::desc`, and (per the BASELINE list) summary text is
+// emitted as the empty string. The C++ class names -- and the per-pass
+// registration helpers that use them -- are unchanged.
+// OBFUSCATED: getArgument() const override { return "_21ebd1b31ed5dbc81e5aa9e2fed732f3"; }
+// OBFUSCATED: getDescription() const override { return R"PD()PD"; }
+// OBFUSCATED: getName() const override { return "_6f7c29e76fd9f6f176504b48059b6b8b"; }
+// OBFUSCATED: ::llvm::cl::desc(R"PO()PO")
+// OBFUSCATED: getArgument() const override { return "_ba8fce64443d8c815b0ed7477793587c"; }
+// OBFUSCATED: getDescription() const override { return R"PD()PD"; }
+// OBFUSCATED: getName() const override { return "_6b8d928968d2a98e9bc5af1bffb90f20"; }
+// OBFUSCATED-NOT: First pass summary
+// OBFUSCATED-NOT: First option description.
+// OBFUSCATED-NOT: Second pass summary
+// OBFUSCATED-NOT: return "first-test-pass"
+// OBFUSCATED-NOT: return "FirstTestPass"
+// OBFUSCATED-NOT: return "second-test-pass"
+// OBFUSCATED-NOT: return "SecondTestPass"
+// OBFUSCATED: inline void registerFirstTestPass()
+// OBFUSCATED: inline void registerSecondTestPass()
+
+// C-API entry points are emitted for every pass. The C symbol uses the
+// pass's C++ class name (unaffected by obfuscation) so downstream linkage
+// is stable across builds with different obfuscators.
+// CAPI: mlirCreateTestFirstTestPass
+// CAPI: mlirRegisterTestFirstTestPass
+// CAPI: mlirCreateTestSecondTestPass
+// CAPI: mlirRegisterTestSecondTestPass
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
index 64f35e7fef6d3..a30aadc4aee38 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp
@@ -13,6 +13,7 @@
#include "mlir/TableGen/Format.h"
#include "mlir/TableGen/GenInfo.h"
#include "mlir/TableGen/Interfaces.h"
+#include "mlir/TableGen/PrivateName.h"
#include "llvm/ADT/SmallVectorExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/CommandLine.h"
@@ -328,15 +329,20 @@ void DefGen::emitName() {
auto *typeDef = cast<TypeDef>(&def);
name = typeDef->getTypeName();
}
+ // The full name is "dialect.mnemonic"; obfuscate dialect and mnemonic
+ // halves independently so that runtime parsing of the dialect prefix still
+ // works.
std::string nameDecl =
- strfmt("static constexpr ::llvm::StringLiteral name = \"{0}\";\n", name);
+ strfmt("static constexpr ::llvm::StringLiteral name = \"{0}\";\n",
+ tblgen::maybeObfuscateDotted(name, def.isPrivate()));
defCls.declare<ExtraClassDeclaration>(std::move(nameDecl));
}
void DefGen::emitDialectName() {
std::string decl =
strfmt("static constexpr ::llvm::StringLiteral dialectName = \"{0}\";\n",
- def.getDialect().getName());
+ tblgen::maybeObfuscate(def.getDialect().getName(),
+ def.getDialect().isPrivate()));
defCls.declare<ExtraClassDeclaration>(std::move(decl));
}
@@ -448,7 +454,9 @@ void DefGen::emitInvariantsVerifier(bool hasImpl, bool hasCustomVerifier) {
void DefGen::emitParserPrinter() {
auto *mnemonic = defCls.addStaticMethod<Method::Constexpr>(
"::llvm::StringLiteral", "getMnemonic");
- mnemonic->body().indent() << strfmt("return {\"{0}\"};", *def.getMnemonic());
+ mnemonic->body().indent()
+ << strfmt("return {\"{0}\"};",
+ tblgen::maybeObfuscate(*def.getMnemonic(), def.isPrivate()));
// Declare the parser and printer, if needed.
bool hasAssemblyFormat = def.getAssemblyFormat().has_value();
@@ -705,7 +713,9 @@ void DefGen::emitMnemonicAliasMethod() {
SmallVector<MethodParameter> params{{"::llvm::raw_ostream &", "os"}};
Method *m = defCls.addMethod<Method::Const>("::mlir::OpAsmAliasResult",
"getAlias", std::move(params));
- m->body().indent() << strfmt("os << \"{0}\";\n", *def.getMnemonic())
+ m->body().indent() << strfmt("os << \"{0}\";\n",
+ tblgen::maybeObfuscate(*def.getMnemonic(),
+ def.isPrivate()))
<< "return ::mlir::OpAsmAliasResult::OverridableAlias;\n";
}
diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt
index d7087cba3c874..7d37b3554f137 100644
--- a/mlir/tools/mlir-tblgen/CMakeLists.txt
+++ b/mlir/tools/mlir-tblgen/CMakeLists.txt
@@ -31,6 +31,7 @@ add_tablegen(mlir-tblgen MLIR
PassCAPIGen.cpp
PassDocGen.cpp
PassGen.cpp
+ PrivateName.cpp
RewriterGen.cpp
SPIRVUtilsGen.cpp
TosaUtilsGen.cpp
diff --git a/mlir/tools/mlir-tblgen/DialectGen.cpp b/mlir/tools/mlir-tblgen/DialectGen.cpp
index 8eecad39f49f3..dd90efc731aeb 100644
--- a/mlir/tools/mlir-tblgen/DialectGen.cpp
+++ b/mlir/tools/mlir-tblgen/DialectGen.cpp
@@ -18,6 +18,7 @@
#include "mlir/TableGen/GenInfo.h"
#include "mlir/TableGen/Interfaces.h"
#include "mlir/TableGen/Operator.h"
+#include "mlir/TableGen/PrivateName.h"
#include "mlir/TableGen/Trait.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/StringExtras.h"
@@ -250,7 +251,9 @@ static void emitDialectDecl(Dialect &dialect, raw_ostream &os) {
tblgen::emitSummaryAndDescComments(os, dialect.getSummary(),
dialect.getDescription(),
/*terminateCmment=*/false);
- os << llvm::formatv(dialectDeclBeginStr, cppName, dialect.getName(),
+ StringRef emittedDialectName =
+ maybeObfuscate(dialect.getName(), dialect.isPrivate());
+ os << llvm::formatv(dialectDeclBeginStr, cppName, emittedDialectName,
superClassName);
// If the dialect requested the default attribute printer and parser, emit
@@ -289,7 +292,7 @@ static void emitDialectDecl(Dialect &dialect, raw_ostream &os) {
attrPair.first, /*capitalizeFirst=*/false);
os << llvm::formatv(discardableAttrHelperDecl, camelNameUpper,
attrPair.first, attrPair.second, camelName,
- dialect.getName());
+ emittedDialectName);
}
if (std::optional<StringRef> extraDecl = dialect.getExtraClassDeclaration())
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 2cb47d084ce69..5672991e0df97 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -24,6 +24,7 @@
#include "mlir/TableGen/GenInfo.h"
#include "mlir/TableGen/Interfaces.h"
#include "mlir/TableGen/Operator.h"
+#include "mlir/TableGen/PrivateName.h"
#include "mlir/TableGen/Property.h"
#include "mlir/TableGen/Region.h"
#include "mlir/TableGen/SideEffects.h"
@@ -273,6 +274,23 @@ static std::string getArgumentName(const Operator &op, int index) {
return std::string(formatv("{0}_{1}", generatedArgName, index));
}
+/// Returns true if a private op should behave as if it did not specify any
+/// custom or declarative assembly format. The op remains registered and can
+/// still be printed in generic form, but its custom textual syntax is not
+/// accepted in private-name obfuscation builds.
+static bool shouldStripPrivateAssemblyFormat(const Operator &op) {
+ return op.isPrivate() && tblgen::obfuscatePrivateNamesEnabled();
+}
+
+/// Returns a diagnostic label for generated verifier text. For private ops in
+/// obfuscation builds, avoid emitting user-authored operand/result/region/etc.
+/// names into release binaries.
+static StringRef getEmittedDiagnosticName(const Operator &op, StringRef name) {
+ if (op.isPrivate() && tblgen::obfuscatePrivateNamesEnabled())
+ return "";
+ return name;
+}
+
// Returns true if we can use unwrapped value for the given `attr` in builders.
static bool canUseUnwrappedRawValue(const tblgen::Attribute &attr) {
return attr.getReturnType() != attr.getStorageType() &&
@@ -381,7 +399,9 @@ class OpOrAdaptorHelper {
return [this](raw_ostream &os) -> raw_ostream & {
if (emitForOp)
return os << "emitOpError(\"";
- return os << formatv("emitError(loc, \"'{0}' op ", op.getOperationName());
+ return os << formatv("emitError(loc, \"'{0}' op ",
+ tblgen::maybeObfuscateDotted(op.getOperationName(),
+ op.isPrivate()));
};
}
@@ -1168,7 +1188,8 @@ OpEmitter::OpEmitter(const Operator &op,
genFolderDecls();
genTypeInterfaceMethods();
genOpInterfaceMethods();
- generateOpFormat(op, opClass, emitHelper.hasProperties());
+ if (!shouldStripPrivateAssemblyFormat(op))
+ generateOpFormat(op, opClass, emitHelper.hasProperties());
genSideEffectInterfaceMethods();
}
void OpEmitter::emitDecl(
@@ -1210,6 +1231,7 @@ void OpEmitter::genAttrNameGetters() {
emitHelper.getAttrMetadata();
bool hasOperandSegmentsSize =
op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments");
+
// Emit the getAttributeNames method.
{
auto *method = opClass.addStaticInlineMethod(
@@ -3835,6 +3857,9 @@ void OpEmitter::genTypeInterfaceMethods() {
}
void OpEmitter::genParser() {
+ if (shouldStripPrivateAssemblyFormat(op))
+ return;
+
if (hasStringAttribute(def, "assemblyFormat"))
return;
@@ -3851,6 +3876,9 @@ void OpEmitter::genParser() {
}
void OpEmitter::genPrinter() {
+ if (shouldStripPrivateAssemblyFormat(op))
+ return;
+
if (hasStringAttribute(def, "assemblyFormat"))
return;
@@ -3978,8 +4006,8 @@ void OpEmitter::genOperandResultVerifier(MethodBody &body,
" if (::mlir::failed(::mlir::OpTrait::impl::verifyValueSizeAttr("
"*this, \"{0}\", \"{1}\", valueGroup{2}.size())))\n"
" return ::mlir::failure();\n",
- value.constraint.getVariadicOfVariadicSegmentSizeAttr(), value.name,
- staticValue.index());
+ value.constraint.getVariadicOfVariadicSegmentSizeAttr(),
+ getEmittedDiagnosticName(op, value.name), staticValue.index());
}
// Otherwise, if there is no predicate there is nothing left to do.
@@ -4031,7 +4059,8 @@ void OpEmitter::genRegionVerifier(MethodBody &body) {
: formatv(getSingleRegion, it.index()).str();
auto constraintFn =
staticVerifierEmitter.getRegionConstraintFn(region.constraint);
- body << formatv(verifyRegion, getRegion, constraintFn, region.name);
+ body << formatv(verifyRegion, getRegion, constraintFn,
+ getEmittedDiagnosticName(op, region.name));
}
body << " }\n";
}
@@ -4069,7 +4098,7 @@ void OpEmitter::genSuccessorVerifier(MethodBody &body) {
auto constraintFn =
staticVerifierEmitter.getSuccessorConstraintFn(successor.constraint);
body << formatv(verifySuccessor, getSuccessor, constraintFn,
- successor.name);
+ getEmittedDiagnosticName(op, successor.name));
}
body << " }\n";
}
@@ -4160,7 +4189,9 @@ void OpEmitter::genOpNameGetter() {
auto *method = opClass.addStaticMethod<Method::Constexpr>(
"::llvm::StringLiteral", "getOperationName");
ERROR_IF_PRUNED(method, "getOperationName", op);
- method->body() << " return ::llvm::StringLiteral(\"" << op.getOperationName()
+ method->body() << " return ::llvm::StringLiteral(\""
+ << tblgen::maybeObfuscateDotted(op.getOperationName(),
+ op.isPrivate())
<< "\");";
}
@@ -4427,7 +4458,7 @@ OpOperandAdaptorEmitter::OpOperandAdaptorEmitter(
body.indent() << "if (odsAttrs)\n";
body.indent() << formatv(
"odsOpName.emplace(\"{0}\", odsAttrs.getContext());\n",
- op.getOperationName());
+ tblgen::maybeObfuscateDotted(op.getOperationName(), op.isPrivate()));
paramList.insert(paramList.begin(), MethodParameter("RangeT", "values"));
auto *constructor = genericAdaptor.addConstructor(paramList);
diff --git a/mlir/tools/mlir-tblgen/PassGen.cpp b/mlir/tools/mlir-tblgen/PassGen.cpp
index e4ae78f022405..d134af08bc4d1 100644
--- a/mlir/tools/mlir-tblgen/PassGen.cpp
+++ b/mlir/tools/mlir-tblgen/PassGen.cpp
@@ -13,6 +13,7 @@
#include "mlir/TableGen/GenInfo.h"
#include "mlir/TableGen/Pass.h"
+#include "mlir/TableGen/PrivateName.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
@@ -186,11 +187,14 @@ static void emitRegistrations(llvm::ArrayRef<Pass> passes, raw_ostream &os) {
/// The code snippet used to generate the start of a pass base class.
///
-/// {0}: The def name of the pass record.
+/// {0}: The def name of the pass record (used as the C++ class identifier).
/// {1}: The base class for the pass.
-/// {2): The command line argument for the pass.
-/// {3}: The summary for the pass.
+/// {2}: The command line argument for the pass (possibly obfuscated).
+/// {3}: The summary for the pass (possibly emptied for private passes).
/// {4}: The dependent dialects registration.
+/// {5}: The display name returned by `getName()` / `getPassName()` (possibly
+/// obfuscated). Distinct from {0} so the C++ class identifier remains
+/// stable when the displayed name is obfuscated.
const char *const baseClassBegin = R"(
template <typename DerivedT>
class {0}Base : public {1} {
@@ -214,9 +218,9 @@ class {0}Base : public {1} {
/// Returns the derived pass name.
static constexpr ::llvm::StringLiteral getPassName() {
- return ::llvm::StringLiteral("{0}");
+ return ::llvm::StringLiteral("{5}");
}
- ::llvm::StringRef getName() const override { return "{0}"; }
+ ::llvm::StringRef getName() const override { return "{5}"; }
/// Support isa/dyn_cast functionality for the derived pass class.
static bool classof(const ::mlir::Pass *pass) {{
@@ -282,13 +286,16 @@ std::unique_ptr<::mlir::Pass> create{0}({0}Options options) {{
/// Emit the declarations for each of the pass options.
static void emitPassOptionDecls(const Pass &pass, raw_ostream &os) {
+ bool stripDescriptions =
+ tblgen::arePassesPrivate() && tblgen::obfuscatePrivateNamesEnabled();
for (const PassOption &opt : pass.getOptions()) {
os.indent(2) << "::mlir::Pass::"
<< (opt.isListOption() ? "ListOption" : "Option");
+ StringRef desc = stripDescriptions ? StringRef("") : opt.getDescription();
os << formatv(R"(<{0}> {1}{{*this, "{2}", ::llvm::cl::desc(R"PO({3})PO"))",
opt.getType(), opt.getCppVariableName(), opt.getArgument(),
- opt.getDescription().trim());
+ desc.trim());
if (std::optional<StringRef> defaultVal = opt.getDefaultValue())
os << ", ::llvm::cl::init(" << defaultVal << ")";
if (std::optional<StringRef> additionalFlags = opt.getAdditionalFlags())
@@ -299,11 +306,13 @@ static void emitPassOptionDecls(const Pass &pass, raw_ostream &os) {
/// Emit the declarations for each of the pass statistics.
static void emitPassStatisticDecls(const Pass &pass, raw_ostream &os) {
+ bool stripDescriptions =
+ tblgen::arePassesPrivate() && tblgen::obfuscatePrivateNamesEnabled();
for (const PassStatistic &stat : pass.getStatistics()) {
+ StringRef desc = stripDescriptions ? StringRef("") : stat.getDescription();
os << formatv(
" ::mlir::Pass::Statistic {0}{{this, \"{1}\", R\"PS({2})PS\"};\n",
- stat.getCppVariableName(), stat.getName(),
- stat.getDescription().trim());
+ stat.getCppVariableName(), stat.getName(), desc.trim());
}
}
@@ -334,10 +343,25 @@ static void emitPassDefs(const Pass &pass, raw_ostream &os) {
"\n ");
}
+ // Privacy-aware substitutions for the base class template. When the
+ // build sets `--mlir-private-passes` and `--mlir-private-name-obfuscator`,
+ // the display name returned by `getName()` / `getPassName()` and the CLI
+ // argument returned by `getArgument()` are obfuscated, and the summary
+ // returned by `getDescription()` is emitted as the empty string. The
+ // per-pass C++ class name and the generated `register*Pass()` /
+ // `mlirCreate*` / `mlirRegister*` helpers are unaffected.
+ bool passesArePrivate = tblgen::arePassesPrivate();
+ std::string displayName =
+ tblgen::maybeObfuscate(passName, passesArePrivate).str();
+ std::string argument =
+ tblgen::maybeObfuscate(pass.getArgument(), passesArePrivate).str();
+ std::string summary = pass.getSummary().trim().str();
+ if (passesArePrivate && tblgen::obfuscatePrivateNamesEnabled())
+ summary.clear();
+
os << "namespace impl {\n";
- os << formatv(baseClassBegin, passName, pass.getBaseClass(),
- pass.getArgument(), pass.getSummary().trim(),
- dependentDialectRegistrations);
+ os << formatv(baseClassBegin, passName, pass.getBaseClass(), argument,
+ summary, dependentDialectRegistrations, displayName);
if (ArrayRef<PassOption> options = pass.getOptions(); !options.empty()) {
os.indent(2) << formatv("{0}Base({0}Options options) : {0}Base() {{\n",
diff --git a/mlir/tools/mlir-tblgen/PrivateName.cpp b/mlir/tools/mlir-tblgen/PrivateName.cpp
new file mode 100644
index 0000000000000..dbe11012943f8
--- /dev/null
+++ b/mlir/tools/mlir-tblgen/PrivateName.cpp
@@ -0,0 +1,160 @@
+//===- PrivateName.cpp - Private name obfuscation for mlir-tblgen ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Plumbing for the `--mlir-private-name-obfuscator` mlir-tblgen flag. This
+// is opt-in release-engineering tooling for downstream MLIR builds; the
+// rest of the mlir-tblgen code generator does not need to know about it.
+//
+// When the user passes `--mlir-private-name-obfuscator=<cmd>`, mlir-tblgen
+// pipes each name marked `isPrivate` into `<cmd>` via stdin, reads the
+// first whitespace-delimited token of stdout, and uses that token (with a
+// leading `_`) as the obfuscated form of the name. Results are cached for
+// the lifetime of the process.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/TableGen/PrivateName.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <cstdio>
+#include <string>
+
+#ifdef _WIN32
+#define MLIR_TBLGEN_POPEN _popen
+#define MLIR_TBLGEN_PCLOSE _pclose
+#else
+#define MLIR_TBLGEN_POPEN popen
+#define MLIR_TBLGEN_PCLOSE pclose
+#endif
+
+using namespace mlir;
+using namespace llvm;
+
+namespace {
+llvm::cl::opt<std::string> privateNameObfuscator(
+ "mlir-private-name-obfuscator",
+ llvm::cl::desc(
+ "Shell command used to obfuscate the dialect namespace, op mnemonic, "
+ "AttrDef/TypeDef mnemonic, and pass argument/name of private "
+ "TableGen items. The set of private dialects is controlled by "
+ "`--mlir-private-dialects`; passes are toggled individually via "
+ "`let isPrivate = 1;` in ODS. For each name, mlir-tblgen runs "
+ "`printf %s <name> | <cmd>` and uses the first whitespace-delimited "
+ "token of stdout (prefixed with `_`) as the obfuscated form. An "
+ "empty value disables obfuscation."),
+ llvm::cl::init(""));
+
+llvm::cl::list<std::string> privateDialects(
+ "mlir-private-dialects", llvm::cl::CommaSeparated,
+ llvm::cl::desc(
+ "Comma-separated list of dialect namespaces whose ops, attributes, "
+ "and types are obfuscated when `--mlir-private-name-obfuscator` is "
+ "also set. Op argument attribute keys are not obfuscated."),
+ llvm::cl::cb<void, std::string>([](const std::string &name) {
+ mlir::tblgen::addPrivateDialect(name);
+ }));
+
+llvm::cl::opt<bool> privatePasses(
+ "mlir-private-passes",
+ llvm::cl::desc(
+ "Treat all passes as private. When `--mlir-private-name-obfuscator` "
+ "is also set, pass arguments / names are replaced with opaque "
+ "identifiers and pass / option / statistic descriptions are emitted "
+ "as empty strings."),
+ llvm::cl::init(false));
+
+/// Process-wide cache of `name -> obfuscated(name)`.
+llvm::StringMap<std::string> &obfuscationCache() {
+ static llvm::StringMap<std::string> cache;
+ return cache;
+}
+
+/// Appends `s` to `out`, single-quoted with embedded single quotes escaped
+/// using the POSIX `'\''` idiom.
+void appendShellSingleQuoted(std::string &out, StringRef s) {
+ out.push_back('\'');
+ for (char c : s) {
+ if (c == '\'')
+ out.append("'\\''");
+ else
+ out.push_back(c);
+ }
+ out.push_back('\'');
+}
+
+/// Invokes `printf '%s' <name> | <obfuscator>` and returns `_` + first
+/// whitespace-delimited token of stdout.
+std::string runPrivateNameObfuscator(StringRef name) {
+ std::string cmd = "printf '%s' ";
+ appendShellSingleQuoted(cmd, name);
+ cmd.append(" | ");
+ cmd.append(privateNameObfuscator);
+
+ FILE *f = MLIR_TBLGEN_POPEN(cmd.c_str(), "r");
+ if (!f)
+ llvm::report_fatal_error(
+ "--mlir-private-name-obfuscator: failed to spawn obfuscator command");
+
+ std::string out;
+ char buf[256];
+ while (size_t n = std::fread(buf, 1, sizeof(buf), f))
+ out.append(buf, n);
+ int rc = MLIR_TBLGEN_PCLOSE(f);
+ if (rc != 0)
+ llvm::report_fatal_error("--mlir-private-name-obfuscator: obfuscator "
+ "command exited with non-zero status");
+
+ StringRef rest = StringRef(out).ltrim();
+ size_t end = rest.find_first_of(" \t\r\n");
+ StringRef token = (end == StringRef::npos) ? rest : rest.substr(0, end);
+ if (token.empty())
+ llvm::report_fatal_error("--mlir-private-name-obfuscator: obfuscator "
+ "command produced empty output");
+
+ std::string result;
+ result.reserve(1 + token.size());
+ result.push_back('_');
+ result.append(token.data(), token.size());
+ return result;
+}
+} // namespace
+
+bool mlir::tblgen::obfuscatePrivateNamesEnabled() {
+ return !privateNameObfuscator.empty();
+}
+
+bool mlir::tblgen::arePassesPrivate() { return privatePasses; }
+
+StringRef mlir::tblgen::obfuscatePrivateName(StringRef name) {
+ if (name.empty())
+ return name;
+ auto &cache = obfuscationCache();
+ if (auto it = cache.find(name); it != cache.end())
+ return it->second;
+ std::string obf = runPrivateNameObfuscator(name);
+ auto inserted = cache.try_emplace(name, std::move(obf));
+ return inserted.first->second;
+}
+
+std::string mlir::tblgen::maybeObfuscateDotted(StringRef name, bool isPrivate) {
+ if (!isPrivate || !obfuscatePrivateNamesEnabled())
+ return std::string(name);
+ size_t dot = name.find('.');
+ if (dot == StringRef::npos)
+ return obfuscatePrivateName(name).str();
+ StringRef dialect = name.substr(0, dot);
+ StringRef rest = name.substr(dot + 1);
+ std::string result = obfuscatePrivateName(dialect).str();
+ result.push_back('.');
+ result += obfuscatePrivateName(rest).str();
+ return result;
+}
diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp
index e3043708a46d1..f63c0257ec17e 100644
--- a/mlir/tools/mlir-tblgen/RewriterGen.cpp
+++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp
@@ -19,6 +19,7 @@
#include "mlir/TableGen/Operator.h"
#include "mlir/TableGen/Pattern.h"
#include "mlir/TableGen/Predicate.h"
+#include "mlir/TableGen/PrivateName.h"
#include "mlir/TableGen/Property.h"
#include "mlir/TableGen/Type.h"
#include "llvm/ADT/FunctionExtras.h"
@@ -727,7 +728,9 @@ void PatternEmitter::emitOperandMatch(DagNode tree, StringRef opName,
verifier, opName, self.str(),
formatv(
"\"operand {0} of op '{1}' failed to satisfy constraint: '{2}'\"",
- operandIndex, op.getOperationName(),
+ operandIndex,
+ tblgen::maybeObfuscateDotted(op.getOperationName(),
+ op.isPrivate()),
escapeString(constraint.getSummary()))
.str());
}
@@ -911,8 +914,9 @@ void PatternEmitter::emitAttributeMatch(DagNode tree, StringRef castedName,
emitMatchCheck(castedName, tgfmt("tblgen_attr", &fmtCtx),
formatv("\"expected op '{0}' to have attribute '{1}' "
"of type '{2}'\"",
- op.getOperationName(), namedAttr->name,
- attr.getStorageType()));
+ tblgen::maybeObfuscateDotted(op.getOperationName(),
+ op.isPrivate()),
+ namedAttr->name, attr.getStorageType()));
}
auto matcher = tree.getArgAsLeaf(argIndex);
@@ -940,10 +944,12 @@ void PatternEmitter::emitAttributeMatch(DagNode tree, StringRef castedName,
}
emitStaticVerifierCall(
verifier, castedName, "tblgen_attr",
- formatv("\"op '{0}' attribute '{1}' failed to satisfy constraint: "
- "'{2}'\"",
- op.getOperationName(), namedAttr->name,
- escapeString(matcher.getAsConstraint().getSummary()))
+ formatv(
+ "\"op '{0}' attribute '{1}' failed to satisfy constraint: "
+ "'{2}'\"",
+ tblgen::maybeObfuscateDotted(op.getOperationName(), op.isPrivate()),
+ namedAttr->name,
+ escapeString(matcher.getAsConstraint().getSummary()))
.str());
}
@@ -980,10 +986,12 @@ void PatternEmitter::emitPropertyMatch(DagNode tree, StringRef castedName,
StringRef verifier = staticMatcherHelper.getVerifierName(matcher);
emitStaticVerifierCall(
verifier, castedName, "tblgen_prop",
- formatv("\"op '{0}' property '{1}' failed to satisfy constraint: "
- "'{2}'\"",
- op.getOperationName(), namedProp->name,
- escapeString(matcher.getAsConstraint().getSummary()))
+ formatv(
+ "\"op '{0}' property '{1}' failed to satisfy constraint: "
+ "'{2}'\"",
+ tblgen::maybeObfuscateDotted(op.getOperationName(), op.isPrivate()),
+ namedProp->name,
+ escapeString(matcher.getAsConstraint().getSummary()))
.str());
}
More information about the Mlir-commits
mailing list