[clang] [clang] Validate ABI tag attributes (PR #84272)
Виктор Чернякин via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 6 18:49:26 PST 2024
https://github.com/LocalSpook created https://github.com/llvm/llvm-project/pull/84272
This PR adds checks to make sure the ABI tag given in `[[gnu::abi_tag("")]]` attribute is valid.
Example errors:
```text
<stdin>:1:16: error: ABI tag cannot be empty
1 | [[gnu::abi_tag("")]] void f();
| ^~
```
```text
<stdin>:1:17: error: character '9' is not allowed at the start of an ABI tag
1 | [[gnu::abi_tag("99")]] void f();
| ^
```
```text
<stdin>:1:18: error: character 'Ж' is not allowed in ABI tags
1 | [[gnu::abi_tag("AЖ")]] void f();
| ^
```
The Itanium ABI specification says that an ABI tag can be any identifier, but this PR rejects tags with non-ASCII characters because I couldn't figure out a nice way of validating them. The code that validates identifiers (and supports Unicode, extensions like `$` in identifers, etc.) is private in Lexer, and special-casing `abi_tag` there seems incorrect to me. Eventually I decided to do the checks in `handleAbiTagAttr()` in Sema. I'm new here, so I could just be missing something.
Fixes #83462.
Relevant section of the Itanium ABI specification: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.abi-tag
---
I don't have commit permissions, so someone will have to commit for me.
I'd like the author tag to be `Victor Chernyakin <chernyakin.victor.j at outlook.com>`
(https://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access told me to say this).
>From 944de3307437ae3fedfea0e46bca3ccd839f20fc Mon Sep 17 00:00:00 2001
From: Victor Chernyakin <chernyakin.victor.j at outlook.com>
Date: Wed, 6 Mar 2024 04:38:42 -0800
Subject: [PATCH] [clang] Validate ABI tag attributes
Before, ABI tags were not validated at all,
which could lead to invalid manglings:
[[gnu::abi_tag("")]] void f() -> _Z1fB0v
[[gnu::abi_tag("999")]] void f() -> _Z1fB3999v
[[gnu::abi_tag("-")]] void f() -> _Z1fB1-v
Fixes #83462.
---
.../clang/Basic/DiagnosticSemaKinds.td | 4 +++
clang/lib/Sema/SemaDeclAttr.cpp | 31 ++++++++++++++++++-
clang/test/Sema/attr-abi-tag.cpp | 9 ++++++
3 files changed, 43 insertions(+), 1 deletion(-)
create mode 100644 clang/test/Sema/attr-abi-tag.cpp
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index b007ff7d8ccf0f..18b8332ea7d329 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5890,6 +5890,10 @@ def err_definition_of_explicitly_defaulted_member : Error<
def err_redefinition_extern_inline : Error<
"redefinition of a 'extern inline' function %0 is not supported in "
"%select{C99 mode|C++}1">;
+def err_empty_abi_tag : Error<
+ "ABI tag cannot be empty">;
+def err_invalid_char_in_abi_tag : Error<
+ "character '%0' is not allowed %select{at the start of an ABI tag|in ABI tags}1">;
def warn_attr_abi_tag_namespace : Warning<
"'abi_tag' attribute on %select{non-inline|anonymous}0 namespace ignored">,
InGroup<IgnoredAttributes>;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 397b5db0dc0669..b7423de07835f0 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -44,6 +44,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -7425,11 +7426,39 @@ static void handleMSConstexprAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(::new (S.Context) MSConstexprAttr(S.Context, AL));
}
+static bool checkValidAbiTag(Sema &S, const Expr *Exp, StringRef Tag) {
+ if (Tag.empty()) {
+ S.Diag(Exp->getExprLoc(), diag::err_empty_abi_tag) << Exp->getSourceRange();
+ return false;
+ }
+
+ for (size_t I = 0; I < Tag.size(); ++I) {
+ if (!isAsciiIdentifierContinue(Tag[I])) {
+ S.Diag(Exp->getBeginLoc().getLocWithOffset(1 + I),
+ diag::err_invalid_char_in_abi_tag)
+ << Tag.substr(I, llvm::getNumBytesForUTF8(Tag[I]))
+ << /*not allowed at all*/ 1;
+ return false;
+ }
+ }
+
+ if (!isAsciiIdentifierStart(Tag[0])) {
+ S.Diag(Exp->getBeginLoc().getLocWithOffset(1),
+ diag::err_invalid_char_in_abi_tag)
+ << Tag.substr(0, llvm::getNumBytesForUTF8(Tag[0]))
+ << /*not allowed at the start*/ 0;
+ return false;
+ }
+
+ return true;
+}
+
static void handleAbiTagAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
SmallVector<StringRef, 4> Tags;
for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
StringRef Tag;
- if (!S.checkStringLiteralArgumentAttr(AL, I, Tag))
+ if (!S.checkStringLiteralArgumentAttr(AL, I, Tag) ||
+ !checkValidAbiTag(S, AL.getArgAsExpr(I), Tag))
return;
Tags.push_back(Tag);
}
diff --git a/clang/test/Sema/attr-abi-tag.cpp b/clang/test/Sema/attr-abi-tag.cpp
new file mode 100644
index 00000000000000..b4f3aade2027bd
--- /dev/null
+++ b/clang/test/Sema/attr-abi-tag.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -verify -fsyntax-only %s
+
+[[gnu::abi_tag("")]] void f1(); // expected-error {{ABI tag cannot be empty}}
+[[gnu::abi_tag("9A")]] void f2(); // expected-error {{character '9' is not allowed at the start of an ABI tag}}
+[[gnu::abi_tag("0")]] void f3(); // expected-error {{character '0' is not allowed at the start of an ABI tag}}
+[[gnu::abi_tag("猫A")]] void f4(); // expected-error {{character '猫' is not allowed in ABI tags}}
+[[gnu::abi_tag("A𨭎")]] void f5(); // expected-error {{character '𨭎' is not allowed in ABI tags}}
+[[gnu::abi_tag("AB")]] void f6();
+[[gnu::abi_tag("A1")]] void f7();
More information about the cfe-commits
mailing list