[clang] c1cfa17 - [Clang] Emit TBAA info for enums in C (#73326)

via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 8 04:58:44 PST 2023


Author: David Sherwood
Date: 2023-12-08T12:58:39Z
New Revision: c1cfa1757c208cd15efec3541aadea6bec52092d

URL: https://github.com/llvm/llvm-project/commit/c1cfa1757c208cd15efec3541aadea6bec52092d
DIFF: https://github.com/llvm/llvm-project/commit/c1cfa1757c208cd15efec3541aadea6bec52092d.diff

LOG: [Clang] Emit TBAA info for enums in C (#73326)

When emitting TBAA information for enums in C code we currently just
treat the data as an 'omnipotent char'. However, with C strict aliasing
this means we fail to optimise certain cases. For example, in the
SPEC2017 xz benchmark there are structs that contain arrays of enums,
and clang pessmistically assumes that accesses to those enums could
alias with other struct members that have a different type.

According to

https://en.cppreference.com/w/c/language/enum

enums should be treated as 'int' types unless explicitly specified (C23)
or if 'int' would not be large enough to hold all the enumerated values.
In the latter case the compiler is free to choose a suitable integer
that would hold all such values.

When compiling C code this patch generates TBAA information for the enum
by using an equivalent integer of the size clang has already chosen for
the enum. I have ignored C++ for now because the rules are more complex.

New test added here:

  clang/test/CodeGen/tbaa.c

Added: 
    clang/test/CodeGen/tbaa.c

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/lib/CodeGen/CodeGenTBAA.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5c044658197139..2403a3d48a09bb 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -204,6 +204,9 @@ C Language Changes
   number of elements in the flexible array member. This information can improve
   the results of the array bound sanitizer and the
   ``__builtin_dynamic_object_size`` builtin.
+- Enums will now be represented in TBAA metadata using their actual underlying
+  integer type. Previously they were treated as chars, which meant they could
+  alias with all other types.
 
 C23 Feature Support
 ^^^^^^^^^^^^^^^^^^^

diff  --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 5906b14dd93cf0..dc288bc3f6157a 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -196,11 +196,14 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
   // Enum types are distinct types. In C++ they have "underlying types",
   // however they aren't related for TBAA.
   if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) {
+    if (!Features.CPlusPlus)
+      return getTypeInfo(ETy->getDecl()->getIntegerType());
+
     // In C++ mode, types have linkage, so we can rely on the ODR and
     // on their mangled names, if they're external.
     // TODO: Is there a way to get a program-wide unique name for a
     // decl with local linkage or no linkage?
-    if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible())
+    if (!ETy->getDecl()->isExternallyVisible())
       return getChar();
 
     SmallString<256> OutName;

diff  --git a/clang/test/CodeGen/tbaa.c b/clang/test/CodeGen/tbaa.c
new file mode 100644
index 00000000000000..0ab81f60a71941
--- /dev/null
+++ b/clang/test/CodeGen/tbaa.c
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -no-struct-path-tbaa -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefixes=PATH
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O0 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefix=NO-TBAA
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -relaxed-aliasing -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s -check-prefix=NO-TBAA
+// Test TBAA metadata generated by front-end.
+//
+// NO-TBAA-NOT: !tbaa
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+
+typedef enum {
+  RED_AUTO_32,
+  GREEN_AUTO_32,
+  BLUE_AUTO_32
+} EnumAuto32;
+
+typedef enum {
+  RED_AUTO_64,
+  GREEN_AUTO_64,
+  BLUE_AUTO_64 = 0x100000000ull
+} EnumAuto64;
+
+typedef enum : uint16_t {
+  RED_16,
+  GREEN_16,
+  BLUE_16
+} Enum16;
+
+typedef enum : uint8_t {
+  RED_8,
+  GREEN_8,
+  BLUE_8
+} Enum8;
+
+uint32_t g0(EnumAuto32 *E, uint32_t *val) {
+// CHECK-LABEL: define{{.*}} i32 @g0(
+// CHECK: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+// CHECK: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// CHECK: load i32, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// PATH-LABEL: define{{.*}} i32 @g0(
+// PATH: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+// PATH: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+// PATH: load i32, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  *val = 5;
+  *E = RED_AUTO_32;
+  return *val;
+}
+
+uint64_t g1(EnumAuto64 *E, uint64_t *val) {
+// CHECK-LABEL: define{{.*}} i64 @g1(
+// CHECK: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TAG_i64:!.*]]
+// CHECK: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TAG_long:!.*]]
+// CHECK: load i64, ptr %{{.*}}, align 8, !tbaa [[TAG_i64]]
+// PATH-LABEL: define{{.*}} i64 @g1(
+// PATH: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TAG_i64:!.*]]
+// PATH: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TAG_long:!.*]]
+// PATH: load i64, ptr %{{.*}}, align 8, !tbaa [[TAG_i64]]
+  *val = 5;
+  *E = RED_AUTO_64;
+  return *val;
+}
+
+uint16_t g2(Enum16 *E, uint16_t *val) {
+// CHECK-LABEL: define{{.*}} i16 @g2(
+// CHECK: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
+// CHECK: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]]
+// CHECK: load i16, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]]
+// PATH-LABEL: define{{.*}} i16 @g2(
+// PATH: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
+// PATH: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]]
+// PATH: load i16, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]]
+  *val = 5;
+  *E = RED_16;
+  return *val;
+}
+
+uint8_t g3(Enum8 *E, uint8_t *val) {
+// CHECK-LABEL: define{{.*}} i8 @g3(
+// CHECK: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TAG_i8:!.*]]
+// CHECK: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]]
+// CHECK: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]]
+// PATH-LABEL: define{{.*}} i8 @g3(
+// PATH: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TAG_i8:!.*]]
+// PATH: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]]
+// PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]]
+  *val = 5;
+  *E = RED_8;
+  return *val;
+}
+
+// CHECK: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
+// CHECK: [[TAG_c_tbaa]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
+// CHECK: [[TYPE_i32]] = !{!"int", [[TYPE_char]],
+// CHECK: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0}
+// CHECK: [[TYPE_i64]] = !{!"long long", [[TYPE_char]],
+// CHECK: [[TAG_long]] = !{[[TYPE_long:!.*]], [[TYPE_long]], i64 0}
+// CHECK: [[TYPE_long]] = !{!"long", [[TYPE_char]],
+// CHECK: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0}
+// CHECK: [[TYPE_i16]] = !{!"short", [[TYPE_char]],
+// CHECK: [[TAG_i8]] = !{[[TYPE_i8:!.*]], [[TYPE_char]], i64 0}
+
+// PATH: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
+// PATH: [[TAG_c_tbaa]] = !{!"Simple C/C++ TBAA"}
+// PATH: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
+// PATH: [[TYPE_i32]] = !{!"int", [[TYPE_char]],
+// PATH: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0}
+// PATH: [[TYPE_i64]] = !{!"long long", [[TYPE_char]],
+// PATH: [[TAG_long]] = !{[[TYPE_long:!.*]], [[TYPE_long]], i64 0}
+// PATH: [[TYPE_long]] = !{!"long", [[TYPE_char]],
+// PATH: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0}
+// PATH: [[TYPE_i16]] = !{!"short", [[TYPE_char]],
+// PATH: [[TAG_i8]] = !{[[TYPE_i8:!.*]], [[TYPE_char]], i64 0}


        


More information about the cfe-commits mailing list