[llvm-branch-commits] [clang] [clang] "modular_format" attribute for functions using format strings (PR #147431)
Daniel Thornburgh via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Nov 3 16:55:25 PST 2025
https://github.com/mysterymath updated https://github.com/llvm/llvm-project/pull/147431
>From a9ac2282d609b7aaca4f7d733960301602e1637b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 10 Jun 2025 14:06:53 -0700
Subject: [PATCH 1/8] [clang] "modular_format" attribute for functions using
format strings
This provides a C language version of the new IR modular-format
attribute. This, in concert with the format attribute, allows a library
function to declare that a modular version of its implementation is
available.
See issue #146159 for context.
---
clang/include/clang/Basic/Attr.td | 11 +++++++++++
clang/include/clang/Basic/AttrDocs.td | 25 +++++++++++++++++++++++++
clang/lib/CodeGen/CGCall.cpp | 12 ++++++++++++
clang/lib/Sema/SemaDeclAttr.cpp | 27 +++++++++++++++++++++++++++
4 files changed, 75 insertions(+)
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 22e60aa9fe312..69f5bf5bba461 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -5290,3 +5290,14 @@ def NonString : InheritableAttr {
let Subjects = SubjectList<[Var, Field]>;
let Documentation = [NonStringDocs];
}
+
+def ModularFormat : InheritableAttr {
+ let Spellings = [Clang<"modular_format">];
+ let Args = [
+ IdentifierArgument<"ModularImplFn">,
+ StringArgument<"ImplName">,
+ VariadicStringArgument<"Aspects">
+ ];
+ let Subjects = SubjectList<[Function]>;
+ let Documentation = [ModularFormatDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index e0bbda083b5cf..ebf1a45dbbb50 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9635,3 +9635,28 @@ silence diagnostics with code like:
__attribute__((nonstring)) char NotAStr[3] = "foo"; // Not diagnosed
}];
}
+
+def ModularFormatDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+The ``modular_format`` attribute can be applied to a function that bears the
+``format`` attribute to indicate that the implementation is modular on the
+format string argument. When the format argument for a given call is constant,
+the compiler may redirect the call to the symbol given as the first argument to
+the attribute (the modular implementation function).
+
+The second argument is a implementation name, and the remaining arguments are
+aspects of the format string for the compiler to report. If the compiler does
+not understand a aspect, it must summarily report that the format string has
+that aspect.
+
+The compiler reports an aspect by issing a relocation for the symbol
+`<impl_name>_<aspect>``. This arranges for code and data needed to support the
+aspect of the implementation to be brought into the link to satisfy weak
+references in the modular implemenation function.
+
+The following aspects are currently supported:
+
+- ``float``: The call has a floating point argument
+ }];
+}
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 741fa44713ac8..67765f7fab28b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2557,6 +2557,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
if (TargetDecl->hasAttr<ArmLocallyStreamingAttr>())
FuncAttrs.addAttribute("aarch64_pstate_sm_body");
+
+ if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) {
+ // TODO: Error checking
+ FormatAttr *Format = TargetDecl->getAttr<FormatAttr>();
+ std::string FormatIdx = std::to_string(Format->getFormatIdx());
+ std::string FirstArg = std::to_string(Format->getFirstArg());
+ SmallVector<StringRef> Args = {
+ FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+ ModularFormat->getImplName()};
+ llvm::append_range(Args, ModularFormat->aspects());
+ FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
+ }
}
// Attach "no-builtins" attributes to:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index e6f8748db7644..8fcfb38661a8f 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6783,6 +6783,29 @@ static void handleVTablePointerAuthentication(Sema &S, Decl *D,
CustomDiscriminationValue));
}
+static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
+ StringRef ImplName;
+ if (!S.checkStringLiteralArgumentAttr(AL, 1, ImplName))
+ return;
+ SmallVector<StringRef> Aspects;
+ for (unsigned I = 2, E = AL.getNumArgs(); I != E; ++I) {
+ StringRef Aspect;
+ if (!S.checkStringLiteralArgumentAttr(AL, I, Aspect))
+ return;
+ Aspects.push_back(Aspect);
+ }
+
+ // Store aspects sorted and without duplicates.
+ llvm::sort(Aspects);
+ Aspects.erase(llvm::unique(Aspects), Aspects.end());
+
+ // TODO: Type checking on identifier
+ // TODO: Merge attributes
+ D->addAttr(::new (S.Context) ModularFormatAttr(
+ S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
+ Aspects.data(), Aspects.size()));
+}
+
//===----------------------------------------------------------------------===//
// Top Level Sema Entry Points
//===----------------------------------------------------------------------===//
@@ -7711,6 +7734,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_VTablePointerAuthentication:
handleVTablePointerAuthentication(S, D, AL);
break;
+
+ case ParsedAttr::AT_ModularFormat:
+ handleModularFormat(S, D, AL);
+ break;
}
}
>From dbd48a1bd45493e22c8c6603de94ed0d09cf8041 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 15 Jul 2025 11:28:20 -0700
Subject: [PATCH 2/8] Update docs to account for clang inferring format
attribute
---
clang/include/clang/Basic/AttrDocs.td | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index ebf1a45dbbb50..33787c8d682c1 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9640,10 +9640,11 @@ def ModularFormatDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
The ``modular_format`` attribute can be applied to a function that bears the
-``format`` attribute to indicate that the implementation is modular on the
-format string argument. When the format argument for a given call is constant,
-the compiler may redirect the call to the symbol given as the first argument to
-the attribute (the modular implementation function).
+``format`` attribute (or standard library functions) to indicate that the
+implementation is modular on the format string argument. When the format string
+for a given call is constant, the compiler may redirect the call to the symbol
+given as the first argument to the attribute (the modular implementation
+function).
The second argument is a implementation name, and the remaining arguments are
aspects of the format string for the compiler to report. If the compiler does
>From 753b076ffbc507dca239def7ad2ceb8fa99be28b Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Wed, 16 Jul 2025 15:19:37 -0700
Subject: [PATCH 3/8] Add an example to clang attr doc
---
clang/include/clang/Basic/AttrDocs.td | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 33787c8d682c1..422946b1a24b6 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9652,10 +9652,18 @@ not understand a aspect, it must summarily report that the format string has
that aspect.
The compiler reports an aspect by issing a relocation for the symbol
-`<impl_name>_<aspect>``. This arranges for code and data needed to support the
+``<impl_name>_<aspect>``. This arranges for code and data needed to support the
aspect of the implementation to be brought into the link to satisfy weak
references in the modular implemenation function.
+For example, say ``printf`` is annotated with
+``modular_format(__modular_printf, __printf, float)``. Then, a call to
+``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would
+become a call to ``__modular_printf`` with the same arguments, as would
+``printf("%f", 42.0)``. The latter would be accompanied with a strong
+relocation against the symbol ``__printf_float``, which would bring floating
+point support for ``printf`` into the link.
+
The following aspects are currently supported:
- ``float``: The call has a floating point argument
>From 3adc15b13f47d76a52b828858985cc94c58038a9 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 22 Jul 2025 13:35:46 -0700
Subject: [PATCH 4/8] Emit the new type arg from format attr
---
clang/lib/CodeGen/CGCall.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 67765f7fab28b..4ecadd2d55236 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2561,10 +2561,12 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
if (auto *ModularFormat = TargetDecl->getAttr<ModularFormatAttr>()) {
// TODO: Error checking
FormatAttr *Format = TargetDecl->getAttr<FormatAttr>();
+ StringRef Type = Format->getType()->getName();
std::string FormatIdx = std::to_string(Format->getFormatIdx());
std::string FirstArg = std::to_string(Format->getFirstArg());
SmallVector<StringRef> Args = {
- FormatIdx, FirstArg, ModularFormat->getModularImplFn()->getName(),
+ Type, FormatIdx, FirstArg,
+ ModularFormat->getModularImplFn()->getName(),
ModularFormat->getImplName()};
llvm::append_range(Args, ModularFormat->aspects());
FuncAttrs.addAttribute("modular-format", llvm::join(Args, ","));
>From caa3b334322958b9c663a1e29cab733aea6a517f Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Tue, 22 Jul 2025 15:01:56 -0700
Subject: [PATCH 5/8] Correct typos
---
clang/include/clang/Basic/AttrDocs.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 422946b1a24b6..e7f1e919d5b8a 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9648,10 +9648,10 @@ function).
The second argument is a implementation name, and the remaining arguments are
aspects of the format string for the compiler to report. If the compiler does
-not understand a aspect, it must summarily report that the format string has
+not understand an aspect, it must summarily report that the format string has
that aspect.
-The compiler reports an aspect by issing a relocation for the symbol
+The compiler reports an aspect by issuing a relocation for the symbol
``<impl_name>_<aspect>``. This arranges for code and data needed to support the
aspect of the implementation to be brought into the link to satisfy weak
references in the modular implemenation function.
>From 29ae289365eb3274d2048ab87cd4fbe7c60f3329 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Thu, 17 Jul 2025 15:56:10 -0700
Subject: [PATCH 6/8] Tests for successful format string passthrough
---
clang/test/CodeGen/attr-modular-format.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
create mode 100644 clang/test/CodeGen/attr-modular-format.c
diff --git a/clang/test/CodeGen/attr-modular-format.c b/clang/test/CodeGen/attr-modular-format.c
new file mode 100644
index 0000000000000..7d0580def41e9
--- /dev/null
+++ b/clang/test/CodeGen/attr-modular-format.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+int printf(const char *fmt, ...) __attribute__((modular_format(__modular_printf, "__printf", "float")));
+int myprintf(const char *fmt, ...) __attribute__((modular_format(__modular_printf, "__printf", "float"), format(printf, 1, 2)));
+
+// CHECK-LABEL: define dso_local void @test_inferred_format(
+// CHECK: {{.*}} = call i32 (ptr, ...) @printf(ptr noundef @.str) #[[ATTR:[0-9]+]]
+void test_inferred_format(void) {
+ printf("hello");
+}
+
+// CHECK-LABEL: define dso_local void @test_explicit_format(
+// CHECK: {{.*}} = call i32 (ptr, ...) @myprintf(ptr noundef @.str) #[[ATTR:[0-9]+]]
+void test_explicit_format(void) {
+ myprintf("hello");
+}
+
+// CHECK: attributes #[[ATTR]] = { "modular-format"="printf,1,2,__modular_printf,__printf,float" }
>From 2f524faa4a0e8491da2feddd02ee35d7c3a7b503 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Fri, 5 Sep 2025 17:10:37 -0700
Subject: [PATCH 7/8] Add redeclaration test
---
clang/lib/Sema/SemaDeclAttr.cpp | 1 -
clang/test/CodeGen/attr-modular-format.c | 10 ++++++++++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 8fcfb38661a8f..b04e9ea5bd2b6 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6800,7 +6800,6 @@ static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
Aspects.erase(llvm::unique(Aspects), Aspects.end());
// TODO: Type checking on identifier
- // TODO: Merge attributes
D->addAttr(::new (S.Context) ModularFormatAttr(
S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
Aspects.data(), Aspects.size()));
diff --git a/clang/test/CodeGen/attr-modular-format.c b/clang/test/CodeGen/attr-modular-format.c
index 7d0580def41e9..2c647214b3bca 100644
--- a/clang/test/CodeGen/attr-modular-format.c
+++ b/clang/test/CodeGen/attr-modular-format.c
@@ -15,4 +15,14 @@ void test_explicit_format(void) {
myprintf("hello");
}
+int redecl(const char *fmt, ...) __attribute__((modular_format(__first_impl, "__first", "one"), format(printf, 1, 2)));
+int redecl(const char *fmt, ...) __attribute__((modular_format(__second_impl, "__second", "two", "three")));
+
+// CHECK-LABEL: define dso_local void @test_redecl(
+// CHECK: {{.*}} = call i32 (ptr, ...) @redecl(ptr noundef @.str) #[[ATTR_REDECL:[0-9]+]]
+void test_redecl(void) {
+ redecl("hello");
+}
+
// CHECK: attributes #[[ATTR]] = { "modular-format"="printf,1,2,__modular_printf,__printf,float" }
+// CHECK: attributes #[[ATTR_REDECL]] = { "modular-format"="printf,1,2,__second_impl,__second,three,two" }
>From 093966386cdf8cf9c5695e8b93b1e439bb7ea8e3 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Mon, 3 Nov 2025 16:48:55 -0800
Subject: [PATCH 8/8] Clarify and correct docs
---
clang/include/clang/Basic/AttrDocs.td | 23 ++++++++++++-----------
clang/lib/Sema/SemaDeclAttr.cpp | 1 -
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index e7f1e919d5b8a..625f815aa892c 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9641,23 +9641,24 @@ def ModularFormatDocs : Documentation {
let Content = [{
The ``modular_format`` attribute can be applied to a function that bears the
``format`` attribute (or standard library functions) to indicate that the
-implementation is modular on the format string argument. When the format string
-for a given call is constant, the compiler may redirect the call to the symbol
-given as the first argument to the attribute (the modular implementation
-function).
+implementation is "modular", that is, that the implemenation is logically
+divided into a number of named aspects. When the compiler can determine that
+not all aspects of the implementation are needed for a given call, the compiler
+may redirect the call to the identifier given as the first argument to the
+attribute (the modular implementation function).
The second argument is a implementation name, and the remaining arguments are
aspects of the format string for the compiler to report. If the compiler does
-not understand an aspect, it must summarily report that the format string has
-that aspect.
+not understand an aspect, it must summarily consider any call to require that
+aspect.
-The compiler reports an aspect by issuing a relocation for the symbol
-``<impl_name>_<aspect>``. This arranges for code and data needed to support the
-aspect of the implementation to be brought into the link to satisfy weak
-references in the modular implemenation function.
+The compiler reports that a call requires an aspect by issuing a relocation for
+the symbol ``<impl_name>_<aspect>`` at the point of the call. This arranges for
+code and data needed to support the aspect of the implementation to be brought
+into the link to satisfy weak references in the modular implemenation function.
For example, say ``printf`` is annotated with
-``modular_format(__modular_printf, __printf, float)``. Then, a call to
+``modular_format(__modular_printf, "__printf", "float")``. Then, a call to
``printf(var, 42)`` would be untouched. A call to ``printf("%d", 42)`` would
become a call to ``__modular_printf`` with the same arguments, as would
``printf("%f", 42.0)``. The latter would be accompanied with a strong
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index b04e9ea5bd2b6..de7f0990879a2 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6799,7 +6799,6 @@ static void handleModularFormat(Sema &S, Decl *D, const ParsedAttr &AL) {
llvm::sort(Aspects);
Aspects.erase(llvm::unique(Aspects), Aspects.end());
- // TODO: Type checking on identifier
D->addAttr(::new (S.Context) ModularFormatAttr(
S.Context, AL, AL.getArgAsIdent(0)->getIdentifierInfo(), ImplName,
Aspects.data(), Aspects.size()));
More information about the llvm-branch-commits
mailing list