[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)

Daniel Thornburgh via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jul 28 15:01:00 PDT 2025


https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/147431

>From 92dad1410839afdc18ef2d92b3b9055d72f85188 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 1/5] [clang] "modular_format" attribute for functions using
 format strings

This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.

See issue #146159 for context.
---
 clang/include/clang/Basic/Attr.td     | 11 +++++++++++
 clang/include/clang/Basic/AttrDocs.td | 25 +++++++++++++++++++++++++
 clang/lib/CodeGen/CGCall.cpp          | 12 ++++++++++++
 clang/lib/Sema/SemaDeclAttr.cpp       | 27 +++++++++++++++++++++++++++
 4 files changed, 75 insertions(+)

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 27fea7dea0a5e..bed878a10424c 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5182,3 +5182,14 @@ def NonString : InheritableAttr {
   let Subjects = SubjectList<[Var, Field]>;
   let Documentation = [NonStringDocs];
 }
+
+def ModularFormat : InheritableAttr {
+  let Spellings = [Clang<"modular_format">];
+  let Args = [
+    IdentifierArgument<"ModularImplFn">,
+    StringArgument<"ImplName">,
+    VariadicStringArgument<"Aspects">
+  ];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 43442f177ab7b..3c325ce2462cb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9427,3 +9427,28 @@ diagnostics with code like:
   __attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
   }];
 }
+
+def ModularFormatDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`<impl_name>_<aspect>``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+  }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a06455d25b1ef..9e8929b5a56ae 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2569,6 +2569,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
 
     if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
       FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+    if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) {
+      // TODO: Error checking
+      FormatAttr *Format = TargetDecl->getAttr<FormatAttr>();
+      std::string FormatIdx = std::to_string(Format->getFormatIdx());
+      std::string FirstArg = std::to_string(Format->getFirstArg());
+      SmallVector<StringRef> Args = {
+          FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+          ModularFormat->getImplName()};
+      llvm::append_range(Args, ModularFormat->aspects());
+      FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+    }
   }
 
   // Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index eba29e609cb05..b70ffd7c35f7b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6897,6 +6897,29 @@ static void handleVTablePointerAuthentication(Sema &S, Decl *D,
       CustomDiscriminationValue));
 }
 
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef ImplName;
+  if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+    return;
+  SmallVector<StringRef> Aspects;
+  for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+    StringRef Aspect;
+    if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+      return;
+    Aspects.push_back(Aspect);
+  }
+
+  // Store aspects sorted and without duplicates.
+  llvm::sort(Aspects);
+  Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+  // TODO: Type checking on identifier
+  // TODO: Merge attributes
+  D->addAttr(::new (S.Context) ModularFormatAttr(
+      S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+      Aspects.data(), Aspects.size()));
+}
+
 //===----------------------------------------------------------------------===//
 // Top Level Sema Entry Points
 //===----------------------------------------------------------------------===//
@@ -7821,6 +7844,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_VTablePointerAuthentication:
     handleVTablePointerAuthentication(S, D, AL);
     break;
+
+  case ParsedAttr::AT_ModularFormat:
+    handleModularFormat(S, D, AL);
+    break;
   }
 }
 

>From e39ecf6519ae050d289d0b1a48d03ccffb74ca75 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 15 Jul 2025 11:28:20 -0700
Subject: [PATCH 2/5] Update docs to account for clang inferring format
 attribute

---
 clang/include/clang/Basic/AttrDocs.td | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 3c325ce2462cb..4ca3d654e651c 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9432,10 +9432,11 @@ def ModularFormatDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
 The ``modular_format`` attribute can be applied to a function that bears the
-``format`` attribute to indicate that the implementation is modular on the
-format string argument. When the format argument for a given call is constant,
-the compiler may redirect the call to the symbol given as the first argument to
-the attribute (the modular implementation function).
+``format`` attribute (or standard library functions) to indicate that the
+implementation is modular on the format string argument. When the format string
+for a given call is constant, the compiler may redirect the call to the symbol
+given as the first argument to the attribute (the modular implementation
+function).
 
 The second argument is a implementation name, and the remaining arguments are
 aspects of the format string for the compiler to report. If the compiler does

>From 60ffa721aca0efaaa5d041f9e22d128b2c6a5854 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 16 Jul 2025 15:19:37 -0700
Subject: [PATCH 3/5] Add an example to clang attr doc

---
 clang/include/clang/Basic/AttrDocs.td | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 4ca3d654e651c..b3839f6d5e059 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9444,10 +9444,18 @@ not understand a aspect, it must summarily report that the format string has
 that aspect.
 
 The compiler reports an aspect by issing a relocation for the symbol
-`<impl_name>_<aspect>``. This arranges for code and data needed to support the
+``<impl_name>_<aspect>``. This arranges for code and data needed to support the
 aspect of the implementation to be brought into the link to satisfy weak
 references in the modular implemenation function.
 
+For example, say ``printf`` is annotated with
+``modular_format(__modular_printf, __printf, float)``. Then, a call to
+``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would
+become a call to ``__modular_printf`` with the same arguments, as would
+``printf("%f", 42.0)``. The latter would be accompanied with a strong
+relocation against the symbol ``__printf_float``, which would bring floating
+point support for ``printf`` into the link.
+
 The following aspects are currently supported:
 
 - ``float``: The call has a floating point argument

>From eaa29e6527e18e61f42ae9626767cb2f0c5a7728 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 22 Jul 2025 13:35:46 -0700
Subject: [PATCH 4/5] Emit the new type arg from format attr

---
 clang/lib/CodeGen/CGCall.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 9e8929b5a56ae..c6190e56a049f 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2573,10 +2573,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
     if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) {
       // TODO: Error checking
       FormatAttr *Format = TargetDecl->getAttr<FormatAttr>();
+      StringRef Type = Format->getType()->getName();
       std::string FormatIdx = std::to_string(Format->getFormatIdx());
       std::string FirstArg = std::to_string(Format->getFirstArg());
       SmallVector<StringRef> Args = {
-          FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+          Type, FormatIdx, FirstArg,
+          ModularFormat->getModularImplFn()->getName(),
           ModularFormat->getImplName()};
       llvm::append_range(Args, ModularFormat->aspects());
       FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));

>From b9753554665250f6ecd2a6643d5b8f088b6612ac Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 22 Jul 2025 15:01:56 -0700
Subject: [PATCH 5/5] Correct typos

---
 clang/include/clang/Basic/AttrDocs.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index b3839f6d5e059..88c9e42cdba8a 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9440,10 +9440,10 @@ function).
 
 The second argument is a implementation name, and the remaining arguments are
 aspects of the format string for the compiler to report. If the compiler does
-not understand a aspect, it must summarily report that the format string has
+not understand an aspect, it must summarily report that the format string has
 that aspect.
 
-The compiler reports an aspect by issing a relocation for the symbol
+The compiler reports an aspect by issuing a relocation for the symbol
 ``<impl_name>_<aspect>``. This arranges for code and data needed to support the
 aspect of the implementation to be brought into the link to satisfy weak
 references in the modular implemenation function.



More information about the llvm-branch-commits mailing list