[llvm-branch-commits] [clang] ec03323 - [clang] Fix some clang->llvm type cache invalidation issues

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jun 2 20:40:39 PDT 2022


Author: Arthur Eubanks
Date: 2022-06-02T20:40:20-07:00
New Revision: ec0332328bd6311b740ce39e684ef3f9674edec2

URL: https://github.com/llvm/llvm-project/commit/ec0332328bd6311b740ce39e684ef3f9674edec2
DIFF: https://github.com/llvm/llvm-project/commit/ec0332328bd6311b740ce39e684ef3f9674edec2.diff

LOG: [clang] Fix some clang->llvm type cache invalidation issues

Take the following as an example

  struct z {
    z (*p)();
  };

  z f();

When we attempt to get the LLVM type of f, we recurse into z. z itself
has a function pointer with the same type as f. Given the recursion,
Clang simply treats z::p as a pointer to an empty struct `{}*`. The
LLVM type of f is as expected. So we have two different potential
LLVM types for a given Clang type. If we store one of those into the
cache, when we access the cache with a different context (e.g. we
are/aren't recursing on z) we may get an incorrect result. There is some
attempt to clear the cache in these cases, but it doesn't seem to handle
all cases.

This change makes it so we only use the cache when we are not in any
sort of function context, i.e. `noRecordsBeingLaidOut() &&
FunctionsBeingProcessed.empty()`, which are the cases where we may
decide to choose a different LLVM type for a given Clang type. LLVM
types for builtin types are never recursive so they're always ok.

This allows us to clear the type cache less often (as seen with the
removal of one of the calls to `TypeCache.clear()`). We
still need to clear it when we use a placeholder type then replace it
later with the final type and other dependent types need to be
recalculated.

I've added a check that the cached type matches what we compute. It
triggered in this test case without the fix. It's currently not
check-clang clean so it's not on by default for something like expensive
checks builds.

This change uncovered another issue where the LLVM types for an argument
and its local temporary don't match. For example in type-cache-3, when
expanding z::dc's argument into a temporary alloca, we ConvertType() the
type of z::p which is `void ({}*)*`, which doesn't match the alloca GEP
type of `{}*`.

No noticeable compile time changes:
https://llvm-compile-time-tracker.com/compare.php?from=3918dd6b8acf8c5886b9921138312d1c638b2937&to=50bdec9836ed40e38ece0657f3058e730adffc4c&stat=instructions

Fixes #53465.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D118744

(cherry picked from commit 45084eab5e63550bf2fdbf325d8fa5498263130d)

Added: 
    clang/test/CodeGenCXX/type-cache-2.cpp
    clang/test/CodeGenCXX/type-cache-3.cpp
    clang/test/CodeGenCXX/type-cache.cpp

Modified: 
    clang/lib/CodeGen/CGBuilder.h
    clang/lib/CodeGen/CGCall.cpp
    clang/lib/CodeGen/CodeGenTypes.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 7c9f41e84eaf5..06b2da146603f 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -9,10 +9,11 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H
 #define LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H
 
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
 #include "Address.h"
 #include "CodeGenTypeCache.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Type.h"
 
 namespace clang {
 namespace CodeGen {

diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a37ff8844e885..34f7a421c9333 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace clang;
 using namespace CodeGen;
@@ -1056,10 +1057,19 @@ void CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV,
     // Call EmitStoreOfScalar except when the lvalue is a bitfield to emit a
     // primitive store.
     assert(isa<NoExpansion>(Exp.get()));
-    if (LV.isBitField())
-      EmitStoreThroughLValue(RValue::get(&*AI++), LV);
-    else
-      EmitStoreOfScalar(&*AI++, LV);
+    llvm::Value *Arg = &*AI++;
+    if (LV.isBitField()) {
+      EmitStoreThroughLValue(RValue::get(Arg), LV);
+    } else {
+      // TODO: currently there are some places are inconsistent in what LLVM
+      // pointer type they use (see D118744). Once clang uses opaque pointers
+      // all LLVM pointer types will be the same and we can remove this check.
+      if (Arg->getType()->isPointerTy()) {
+        Address Addr = LV.getAddress(*this);
+        Arg = Builder.CreateBitCast(Arg, Addr.getElementType());
+      }
+      EmitStoreOfScalar(Arg, LV);
+    }
   }
 }
 

diff  --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 4839e22c4b144..7a8a7c916473e 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -25,9 +25,20 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
+
 using namespace clang;
 using namespace CodeGen;
 
+#ifndef NDEBUG
+#include "llvm/Support/CommandLine.h"
+// TODO: turn on by default when defined(EXPENSIVE_CHECKS) once check-clang is
+// -verify-type-cache clean.
+static llvm::cl::opt<bool> VerifyTypeCache(
+    "verify-type-cache",
+    llvm::cl::desc("Verify that the type cache matches the computed type"),
+    llvm::cl::init(false), llvm::cl::Hidden);
+#endif
+
 CodeGenTypes::CodeGenTypes(CodeGenModule &cgm)
   : CGM(cgm), Context(cgm.getContext()), TheModule(cgm.getModule()),
     Target(cgm.getTarget()), TheCXXABI(cgm.getCXXABI()),
@@ -382,9 +393,6 @@ llvm::Type *CodeGenTypes::ConvertFunctionTypeInternal(QualType QFT) {
 
   RecordsBeingLaidOut.erase(Ty);
 
-  if (SkippedLayout)
-    TypeCache.clear();
-
   if (RecordsBeingLaidOut.empty())
     while (!DeferredRecords.empty())
       ConvertRecordDeclType(DeferredRecords.pop_back_val());
@@ -415,11 +423,29 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
   if (const RecordType *RT = dyn_cast<RecordType>(Ty))
     return ConvertRecordDeclType(RT->getDecl());
 
-  // See if type is already cached.
-  llvm::DenseMap<const Type *, llvm::Type *>::iterator TCI = TypeCache.find(Ty);
-  // If type is found in map then use it. Otherwise, convert type T.
-  if (TCI != TypeCache.end())
-    return TCI->second;
+  // The LLVM type we return for a given Clang type may not always be the same,
+  // most notably when dealing with recursive structs. We mark these potential
+  // cases with ShouldUseCache below. Builtin types cannot be recursive.
+  // TODO: when clang uses LLVM opaque pointers we won't be able to represent
+  // recursive types with LLVM types, making this logic much simpler.
+  llvm::Type *CachedType = nullptr;
+  bool ShouldUseCache =
+      Ty->isBuiltinType() ||
+      (noRecordsBeingLaidOut() && FunctionsBeingProcessed.empty());
+  if (ShouldUseCache) {
+    llvm::DenseMap<const Type *, llvm::Type *>::iterator TCI =
+        TypeCache.find(Ty);
+    if (TCI != TypeCache.end())
+      CachedType = TCI->second;
+    if (CachedType) {
+#ifndef NDEBUG
+      if (!VerifyTypeCache)
+        return CachedType;
+#else
+      return CachedType;
+#endif
+    }
+  }
 
   // If we don't have it in the cache, convert it now.
   llvm::Type *ResultType = nullptr;
@@ -797,7 +823,15 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
 
   assert(ResultType && "Didn't convert a type?");
 
-  TypeCache[Ty] = ResultType;
+#ifndef NDEBUG
+  if (CachedType) {
+    assert(CachedType == ResultType &&
+           "Cached type doesn't match computed type");
+  }
+#endif
+
+  if (ShouldUseCache)
+    TypeCache[Ty] = ResultType;
   return ResultType;
 }
 

diff  --git a/clang/test/CodeGenCXX/type-cache-2.cpp b/clang/test/CodeGenCXX/type-cache-2.cpp
new file mode 100644
index 0000000000000..bc672f7951660
--- /dev/null
+++ b/clang/test/CodeGenCXX/type-cache-2.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -mllvm -verify-type-cache -emit-llvm %s -o - -triple i386-pc-windows-msvc19.16.0 | FileCheck %s
+// REQUIRES: asserts, x86-registered-target
+
+// CHECK: call void @"?dc at z@@SAXU1@@Z"
+struct z {
+  static void dc(z);
+  void (*p)(z);
+};
+
+void f() {
+  z::dc({});
+}

diff  --git a/clang/test/CodeGenCXX/type-cache-3.cpp b/clang/test/CodeGenCXX/type-cache-3.cpp
new file mode 100644
index 0000000000000..2cbb63b143deb
--- /dev/null
+++ b/clang/test/CodeGenCXX/type-cache-3.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -mllvm -verify-type-cache -emit-llvm %s -o - -triple i386-pc-windows-msvc19.16.0 | FileCheck %s
+// REQUIRES: asserts, x86-registered-target
+
+// CHECK-LABEL: define {{.*}}@"?f@@YAXXZ"(
+// CHECK: call void @"?dc at z@@SAXU1@@Z"
+
+// CHECK-LABEL: define {{.*}}@"?dc at z@@SAXU1@@Z"(
+// CHECK: store void ({}*)* %{{.*}}, void ({}*)** %{{.*}}
+struct z {
+  static void dc(z) {}
+  void (*p)(z);
+};
+
+void f() {
+  z::dc({});
+}

diff  --git a/clang/test/CodeGenCXX/type-cache.cpp b/clang/test/CodeGenCXX/type-cache.cpp
new file mode 100644
index 0000000000000..02caee8e0f625
--- /dev/null
+++ b/clang/test/CodeGenCXX/type-cache.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -mllvm -verify-type-cache -emit-llvm %s -o - -triple i386-pc-windows-msvc19.16.0 | FileCheck %s
+// REQUIRES: asserts, x86-registered-target
+
+// CHECK: call {}* @"?f@@YA?AUz@@XZ"()
+
+struct z {
+  z (*p)();
+};
+
+z f();
+
+void g() {
+  f();
+}


        


More information about the llvm-branch-commits mailing list