[clang] 4cc791b - [Clang] Add __datasizeof (#67805)

via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 13 02:00:11 PST 2023


Author: philnik777
Date: 2023-11-13T11:00:07+01:00
New Revision: 4cc791bc98e075879f8c379f17e0b0369d57a40d

URL: https://github.com/llvm/llvm-project/commit/4cc791bc98e075879f8c379f17e0b0369d57a40d
DIFF: https://github.com/llvm/llvm-project/commit/4cc791bc98e075879f8c379f17e0b0369d57a40d.diff

LOG: [Clang] Add __datasizeof (#67805)

The data size is required for implementing the `memmove` optimization
for `std::copy`, `std::move` etc. correctly as well as replacing
`__compressed_pair` with `[[no_unique_address]]` in libc++. Since the
compiler already knows the data size, we can avoid some complexity by
exposing that information.

Added: 
    clang/test/CodeGenCXX/datasizeof.cpp
    clang/test/SemaCXX/datasizeof.cpp

Modified: 
    clang/docs/LanguageExtensions.rst
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/Features.def
    clang/include/clang/Basic/TokenKinds.def
    clang/lib/AST/ExprConstant.cpp
    clang/lib/AST/ItaniumMangle.cpp
    clang/lib/CodeGen/CGExprScalar.cpp
    clang/lib/Parse/ParseExpr.cpp
    clang/lib/Sema/SemaExpr.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 30e288f986782fd..294210c6ac140a9 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -424,6 +424,18 @@ Builtin Macros
   "UTF-16" or "UTF-32" (but may change in the future if the
   ``-fwide-exec-charset="Encoding-Name"`` option is implemented.)
 
+Implementation-defined keywords
+===============================
+
+__datasizeof
+------------
+
+``__datasizeof`` behaves like ``sizeof``, except that it returns the size of the
+type ignoring tail padding.
+
+..
+  FIXME: This should list all the keyword extensions
+
 .. _langext-vectors:
 
 Vectors and Extended Vectors

diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 74358219ba9fb22..66f82de69099533 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -217,6 +217,8 @@ Non-comprehensive list of changes in this release
   (e.g., ``uint16x8_t``), this returns the constant number of elements at compile-time.
   For scalable vectors, e.g., SVE or RISC-V V, the number of elements is not known at compile-time and is
   determined at runtime.
+* The ``__datasizeof`` keyword has been added. It is similar to ``sizeof``
+  except that it returns the size of a type ignoring tail padding.
 
 New Compiler Flags
 ------------------

diff  --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def
index da77aee8de36990..adaf2e413f2f6de 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -277,6 +277,7 @@ EXTENSION(gnu_asm_goto_with_outputs_full, LangOpts.GNUAsm)
 EXTENSION(matrix_types, LangOpts.MatrixTypes)
 EXTENSION(matrix_types_scalar_division, true)
 EXTENSION(cxx_attributes_on_using_declarations, LangOpts.CPlusPlus11)
+EXTENSION(datasizeof, LangOpts.CPlusPlus)
 
 FEATURE(builtin_headers_in_system_modules, LangOpts.BuiltinHeadersInSystemModules)
 FEATURE(cxx_abi_relative_vtable, LangOpts.CPlusPlus && LangOpts.RelativeCXXABIVTables)

diff  --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 3ce317d318f9bb6..6cb4b3f250c4032 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -310,6 +310,7 @@ KEYWORD(return                      , KEYALL)
 KEYWORD(short                       , KEYALL)
 KEYWORD(signed                      , KEYALL)
 UNARY_EXPR_OR_TYPE_TRAIT(sizeof, SizeOf, KEYALL)
+UNARY_EXPR_OR_TYPE_TRAIT(__datasizeof, DataSizeOf, KEYCXX)
 KEYWORD(static                      , KEYALL)
 KEYWORD(struct                      , KEYALL)
 KEYWORD(switch                      , KEYALL)

diff  --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e16fec6109e744e..4aa8045bc93be71 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -3184,9 +3184,14 @@ static bool HandleLValueIndirectMember(EvalInfo &Info, const Expr *E,
   return true;
 }
 
+enum class SizeOfType {
+  SizeOf,
+  DataSizeOf,
+};
+
 /// Get the size of the given type in char units.
-static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc,
-                         QualType Type, CharUnits &Size) {
+static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc, QualType Type,
+                         CharUnits &Size, SizeOfType SOT = SizeOfType::SizeOf) {
   // sizeof(void), __alignof__(void), sizeof(function) = 1 as a gcc
   // extension.
   if (Type->isVoidType() || Type->isFunctionType()) {
@@ -3206,7 +3211,10 @@ static bool HandleSizeof(EvalInfo &Info, SourceLocation Loc,
     return false;
   }
 
-  Size = Info.Ctx.getTypeSizeInChars(Type);
+  if (SOT == SizeOfType::SizeOf)
+    Size = Info.Ctx.getTypeSizeInChars(Type);
+  else
+    Size = Info.Ctx.getTypeInfoDataSizeInChars(Type).Width;
   return true;
 }
 
@@ -13689,6 +13697,7 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
       return Success(1, E);
   }
 
+  case UETT_DataSizeOf:
   case UETT_SizeOf: {
     QualType SrcTy = E->getTypeOfArgument();
     // C++ [expr.sizeof]p2: "When applied to a reference or a reference type,
@@ -13697,8 +13706,11 @@ bool IntExprEvaluator::VisitUnaryExprOrTypeTraitExpr(
       SrcTy = Ref->getPointeeType();
 
     CharUnits Sizeof;
-    if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof))
+    if (!HandleSizeof(Info, E->getExprLoc(), SrcTy, Sizeof,
+                      E->getKind() == UETT_DataSizeOf ? SizeOfType::DataSizeOf
+                                                      : SizeOfType::SizeOf)) {
       return false;
+    }
     return Success(Sizeof, E);
   }
   case UETT_OpenMPRequiredSimdAlign:

diff  --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 5ac8c2e447cdb5a..6c5217f0da11e6c 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -28,6 +28,7 @@
 #include "clang/AST/Mangle.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/ABI.h"
+#include "clang/Basic/DiagnosticAST.h"
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
@@ -5068,6 +5069,14 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
       Out << 'a';
       MangleAlignofSizeofArg();
       break;
+    case UETT_DataSizeOf: {
+      DiagnosticsEngine &Diags = Context.getDiags();
+      unsigned DiagID =
+          Diags.getCustomDiagID(DiagnosticsEngine::Error,
+                                "cannot yet mangle __datasizeof expression");
+      Diags.Report(DiagID);
+      return;
+    }
     case UETT_VecStep: {
       DiagnosticsEngine &Diags = Context.getDiags();
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,

diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 1a7a3f97bb779a0..05a3c80fbdd03ab 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3053,9 +3053,10 @@ Value *
 ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
                               const UnaryExprOrTypeTraitExpr *E) {
   QualType TypeToSize = E->getTypeOfArgument();
-  if (E->getKind() == UETT_SizeOf) {
+  if (auto Kind = E->getKind();
+      Kind == UETT_SizeOf || Kind == UETT_DataSizeOf) {
     if (const VariableArrayType *VAT =
-          CGF.getContext().getAsVariableArrayType(TypeToSize)) {
+            CGF.getContext().getAsVariableArrayType(TypeToSize)) {
       if (E->isArgumentType()) {
         // sizeof(type) - make sure to emit the VLA size.
         CGF.EmitVariablyModifiedType(TypeToSize);

diff  --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 53fba3b2f59242b..9e05394e8d07dd6 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1460,6 +1460,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
                            // unary-expression: '__alignof' '(' type-name ')'
   case tok::kw_sizeof:     // unary-expression: 'sizeof' unary-expression
                            // unary-expression: 'sizeof' '(' type-name ')'
+  // unary-expression: '__datasizeof' unary-expression
+  // unary-expression: '__datasizeof' '(' type-name ')'
+  case tok::kw___datasizeof:
   case tok::kw_vec_step:   // unary-expression: OpenCL 'vec_step' expression
   // unary-expression: '__builtin_omp_required_simd_align' '(' type-name ')'
   case tok::kw___builtin_omp_required_simd_align:
@@ -2307,6 +2310,8 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
 ///       unary-expression:  [C99 6.5.3]
 ///         'sizeof' unary-expression
 ///         'sizeof' '(' type-name ')'
+/// [Clang] '__datasizeof' unary-expression
+/// [Clang] '__datasizeof' '(' type-name ')'
 /// [GNU]   '__alignof' unary-expression
 /// [GNU]   '__alignof' '(' type-name ')'
 /// [C11]   '_Alignof' '(' type-name ')'
@@ -2335,8 +2340,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
                                            SourceRange &CastRange) {
 
   assert(OpTok.isOneOf(tok::kw_typeof, tok::kw_typeof_unqual, tok::kw_sizeof,
-                       tok::kw___alignof, tok::kw_alignof, tok::kw__Alignof,
-                       tok::kw_vec_step,
+                       tok::kw___datasizeof, tok::kw___alignof, tok::kw_alignof,
+                       tok::kw__Alignof, tok::kw_vec_step,
                        tok::kw___builtin_omp_required_simd_align,
                        tok::kw___builtin_vectorelements) &&
          "Not a typeof/sizeof/alignof/vec_step expression!");
@@ -2347,8 +2352,8 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok,
   if (Tok.isNot(tok::l_paren)) {
     // If construct allows a form without parenthesis, user may forget to put
     // pathenthesis around type name.
-    if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
-                      tok::kw__Alignof)) {
+    if (OpTok.isOneOf(tok::kw_sizeof, tok::kw___datasizeof, tok::kw___alignof,
+                      tok::kw_alignof, tok::kw__Alignof)) {
       if (isTypeIdUnambiguously()) {
         DeclSpec DS(AttrFactory);
         ParseSpecifierQualifierList(DS);
@@ -2451,14 +2456,16 @@ ExprResult Parser::ParseSYCLUniqueStableNameExpression() {
 ///         'sizeof' unary-expression
 ///         'sizeof' '(' type-name ')'
 /// [C++11] 'sizeof' '...' '(' identifier ')'
+/// [Clang] '__datasizeof' unary-expression
+/// [Clang] '__datasizeof' '(' type-name ')'
 /// [GNU]   '__alignof' unary-expression
 /// [GNU]   '__alignof' '(' type-name ')'
 /// [C11]   '_Alignof' '(' type-name ')'
 /// [C++11] 'alignof' '(' type-id ')'
 /// \endverbatim
 ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
-  assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___alignof, tok::kw_alignof,
-                     tok::kw__Alignof, tok::kw_vec_step,
+  assert(Tok.isOneOf(tok::kw_sizeof, tok::kw___datasizeof, tok::kw___alignof,
+                     tok::kw_alignof, tok::kw__Alignof, tok::kw_vec_step,
                      tok::kw___builtin_omp_required_simd_align,
                      tok::kw___builtin_vectorelements) &&
          "Not a sizeof/alignof/vec_step expression!");
@@ -2531,16 +2538,29 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
                                                           CastRange);
 
   UnaryExprOrTypeTrait ExprKind = UETT_SizeOf;
-  if (OpTok.isOneOf(tok::kw_alignof, tok::kw__Alignof))
+  switch (OpTok.getKind()) {
+  case tok::kw_alignof:
+  case tok::kw__Alignof:
     ExprKind = UETT_AlignOf;
-  else if (OpTok.is(tok::kw___alignof))
+    break;
+  case tok::kw___alignof:
     ExprKind = UETT_PreferredAlignOf;
-  else if (OpTok.is(tok::kw_vec_step))
+    break;
+  case tok::kw_vec_step:
     ExprKind = UETT_VecStep;
-  else if (OpTok.is(tok::kw___builtin_omp_required_simd_align))
+    break;
+  case tok::kw___builtin_omp_required_simd_align:
     ExprKind = UETT_OpenMPRequiredSimdAlign;
-  else if (OpTok.is(tok::kw___builtin_vectorelements))
+    break;
+  case tok::kw___datasizeof:
+    ExprKind = UETT_DataSizeOf;
+    break;
+  case tok::kw___builtin_vectorelements:
     ExprKind = UETT_VectorElements;
+    break;
+  default:
+    break;
+  }
 
   if (isCastExpr)
     return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(),

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 87e5b63e453f6ef..5b0c4439fd1710c 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4444,8 +4444,9 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E,
   assert(!ExprTy->isReferenceType());
 
   bool IsUnevaluatedOperand =
-      (ExprKind == UETT_SizeOf || ExprKind == UETT_AlignOf ||
-       ExprKind == UETT_PreferredAlignOf || ExprKind == UETT_VecStep);
+      (ExprKind == UETT_SizeOf || ExprKind == UETT_DataSizeOf ||
+       ExprKind == UETT_AlignOf || ExprKind == UETT_PreferredAlignOf ||
+       ExprKind == UETT_VecStep);
   if (IsUnevaluatedOperand) {
     ExprResult Result = CheckUnevaluatedOperand(E);
     if (Result.isInvalid())

diff  --git a/clang/test/CodeGenCXX/datasizeof.cpp b/clang/test/CodeGenCXX/datasizeof.cpp
new file mode 100644
index 000000000000000..5a8f4fc79bdf054
--- /dev/null
+++ b/clang/test/CodeGenCXX/datasizeof.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: define dso_local noundef i32 @_Z4testi(
+// CHECK-SAME: i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// CHECK-NEXT:    store i32 [[INC]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[TMP2:%.*]] = mul nuw i64 4, [[TMP1]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test(int i) {
+  (void)__datasizeof(int[i++]);
+  return i;
+}

diff  --git a/clang/test/SemaCXX/datasizeof.cpp b/clang/test/SemaCXX/datasizeof.cpp
new file mode 100644
index 000000000000000..f96660d2028d078
--- /dev/null
+++ b/clang/test/SemaCXX/datasizeof.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-linux-gnu -verify %s
+
+#if !__has_extension(datasizeof)
+#  error "Expected datasizeof extension"
+#endif
+
+struct HasPadding {
+  int i;
+  char c;
+};
+
+struct HasUsablePadding {
+  int i;
+  char c;
+
+  HasUsablePadding() {}
+};
+
+struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
+
+static_assert(__datasizeof(int) == 4);
+static_assert(__datasizeof(HasPadding) == 8);
+static_assert(__datasizeof(HasUsablePadding) == 5);
+static_assert(__datasizeof(void)); // expected-error {{invalid application of '__datasizeof' to an incomplete type 'void'}}
+static_assert(__datasizeof(Incomplete)); // expected-error {{invalid application of '__datasizeof' to an incomplete type 'Incomplete'}}
+
+static_assert([] {
+  int* p = nullptr;
+  HasPadding* p2 = nullptr;
+  HasUsablePadding* p3 = nullptr;
+  static_assert(__datasizeof(*p) == 4);
+  static_assert(__datasizeof *p == 4);
+  static_assert(__datasizeof(*p2) == 8);
+  static_assert(__datasizeof(*p3) == 5);
+
+  return true;
+}());
+
+template <typename Ty>
+constexpr int data_size_of() {
+  return __datasizeof(Ty);
+}
+static_assert(data_size_of<int>() == __datasizeof(int));
+static_assert(data_size_of<HasPadding>() == __datasizeof(HasPadding));
+static_assert(data_size_of<HasUsablePadding>() == __datasizeof(HasUsablePadding));
+
+struct S {
+  int i = __datasizeof(S);
+  float f;
+  char c;
+};
+
+static_assert(S{}.i == 9);


        


More information about the cfe-commits mailing list