[clang] [clang] Validate ABI tag attributes (PR #84272)

Виктор Чернякин via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 6 18:49:26 PST 2024


https://github.com/LocalSpook created https://github.com/llvm/llvm-project/pull/84272

This PR adds checks to make sure the ABI tag given in `[[gnu::abi_tag("")]]` attribute is valid.

Example errors:

```text
<stdin>:1:16: error: ABI tag cannot be empty
    1 | [[gnu::abi_tag("")]] void f();
      |                ^~
```

```text
<stdin>:1:17: error: character '9' is not allowed at the start of an ABI tag
    1 | [[gnu::abi_tag("99")]] void f();
      |                 ^
```

```text
<stdin>:1:18: error: character 'Ж' is not allowed in ABI tags
    1 | [[gnu::abi_tag("AЖ")]] void f();
      |                  ^
```

The Itanium ABI specification says that an ABI tag can be any identifier, but this PR rejects tags with non-ASCII characters because I couldn't figure out a nice way of validating them. The code that validates identifiers (and supports Unicode, extensions like `$` in identifers, etc.) is private in Lexer, and special-casing `abi_tag` there seems incorrect to me. Eventually I decided to do the checks in `handleAbiTagAttr()` in Sema. I'm new here, so I could just be missing something.

Fixes #83462.

Relevant section of the Itanium ABI specification: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.abi-tag

---

I don't have commit permissions, so someone will have to commit for me.
I'd like the author tag to be `Victor Chernyakin <chernyakin.victor.j at outlook.com>`
(https://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access told me to say this).

>From 944de3307437ae3fedfea0e46bca3ccd839f20fc Mon Sep 17 00:00:00 2001
From: Victor Chernyakin <chernyakin.victor.j at outlook.com>
Date: Wed, 6 Mar 2024 04:38:42 -0800
Subject: [PATCH] [clang] Validate ABI tag attributes

Before, ABI tags were not validated at all,
which could lead to invalid manglings:

[[gnu::abi_tag("")]]    void f() -> _Z1fB0v
[[gnu::abi_tag("999")]] void f() -> _Z1fB3999v
[[gnu::abi_tag("-")]]   void f() -> _Z1fB1-v

Fixes #83462.
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  4 +++
 clang/lib/Sema/SemaDeclAttr.cpp               | 31 ++++++++++++++++++-
 clang/test/Sema/attr-abi-tag.cpp              |  9 ++++++
 3 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Sema/attr-abi-tag.cpp

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index b007ff7d8ccf0f..18b8332ea7d329 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5890,6 +5890,10 @@ def err_definition_of_explicitly_defaulted_member : Error<
 def err_redefinition_extern_inline : Error<
   "redefinition of a 'extern inline' function %0 is not supported in "
   "%select{C99 mode|C++}1">;
+def err_empty_abi_tag : Error<
+  "ABI tag cannot be empty">;
+def err_invalid_char_in_abi_tag : Error<
+  "character '%0' is not allowed %select{at the start of an ABI tag|in ABI tags}1">;
 def warn_attr_abi_tag_namespace : Warning<
   "'abi_tag' attribute on %select{non-inline|anonymous}0 namespace ignored">,
   InGroup<IgnoredAttributes>;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 397b5db0dc0669..b7423de07835f0 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -44,6 +44,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Assumptions.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -7425,11 +7426,39 @@ static void handleMSConstexprAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(::new (S.Context) MSConstexprAttr(S.Context, AL));
 }
 
+static bool checkValidAbiTag(Sema &S, const Expr *Exp, StringRef Tag) {
+  if (Tag.empty()) {
+    S.Diag(Exp->getExprLoc(), diag::err_empty_abi_tag) << Exp->getSourceRange();
+    return false;
+  }
+
+  for (size_t I = 0; I < Tag.size(); ++I) {
+    if (!isAsciiIdentifierContinue(Tag[I])) {
+      S.Diag(Exp->getBeginLoc().getLocWithOffset(1 + I),
+             diag::err_invalid_char_in_abi_tag)
+          << Tag.substr(I, llvm::getNumBytesForUTF8(Tag[I]))
+          << /*not allowed at all*/ 1;
+      return false;
+    }
+  }
+
+  if (!isAsciiIdentifierStart(Tag[0])) {
+    S.Diag(Exp->getBeginLoc().getLocWithOffset(1),
+           diag::err_invalid_char_in_abi_tag)
+        << Tag.substr(0, llvm::getNumBytesForUTF8(Tag[0]))
+        << /*not allowed at the start*/ 0;
+    return false;
+  }
+
+  return true;
+}
+
 static void handleAbiTagAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   SmallVector<StringRef, 4> Tags;
   for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
     StringRef Tag;
-    if (!S.checkStringLiteralArgumentAttr(AL, I, Tag))
+    if (!S.checkStringLiteralArgumentAttr(AL, I, Tag) ||
+        !checkValidAbiTag(S, AL.getArgAsExpr(I), Tag))
       return;
     Tags.push_back(Tag);
   }
diff --git a/clang/test/Sema/attr-abi-tag.cpp b/clang/test/Sema/attr-abi-tag.cpp
new file mode 100644
index 00000000000000..b4f3aade2027bd
--- /dev/null
+++ b/clang/test/Sema/attr-abi-tag.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -verify -fsyntax-only %s
+
+[[gnu::abi_tag("")]]    void f1();  // expected-error {{ABI tag cannot be empty}}
+[[gnu::abi_tag("9A")]]  void f2();  // expected-error {{character '9' is not allowed at the start of an ABI tag}}
+[[gnu::abi_tag("0")]]   void f3();  // expected-error {{character '0' is not allowed at the start of an ABI tag}}
+[[gnu::abi_tag("猫A")]] void f4();  // expected-error {{character '猫' is not allowed in ABI tags}}
+[[gnu::abi_tag("A𨭎")]] void f5();  // expected-error {{character '𨭎' is not allowed in ABI tags}}
+[[gnu::abi_tag("AB")]]  void f6();
+[[gnu::abi_tag("A1")]]  void f7();



More information about the cfe-commits mailing list