[clang] [llvm] Add option to generate additional debug info for expression dereferencing pointer to pointers. (PR #81545)

William Junda Huang via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 20 12:56:06 PDT 2024


https://github.com/huangjd updated https://github.com/llvm/llvm-project/pull/81545

>From f2c82758e1cba7773e41d941d2812c829c339675 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Mon, 12 Feb 2024 02:27:13 -0500
Subject: [PATCH 1/9] Add option to generate additional info for expression
 containing pointer of pointers.

Such expression does correspond to a variable in the source code thus
does not have a debug location. However the user may want to collect
sampling counter for memory accesses to analyze usage frequency of class
members. By enabling -fdebug_info_for_pointer_type a psuedo variable and
its debug info is generated in place whenever there's an intermediate
expression with pointer access.
---
 clang/include/clang/Basic/DebugOptions.def |  4 ++
 clang/include/clang/Driver/Options.td      |  4 ++
 clang/lib/CodeGen/CGDebugInfo.cpp          | 16 +++++
 clang/lib/CodeGen/CGDebugInfo.h            |  6 ++
 clang/lib/CodeGen/CGDecl.cpp               |  4 ++
 clang/lib/CodeGen/CGExpr.cpp               | 79 ++++++++++++++++++++++
 clang/lib/CodeGen/CodeGenFunction.h        |  5 ++
 clang/lib/Driver/ToolChains/Clang.cpp      |  3 +
 8 files changed, 121 insertions(+)

diff --git a/clang/include/clang/Basic/DebugOptions.def b/clang/include/clang/Basic/DebugOptions.def
index 7cd3edf08a17ea..6dd09f46842077 100644
--- a/clang/include/clang/Basic/DebugOptions.def
+++ b/clang/include/clang/Basic/DebugOptions.def
@@ -129,6 +129,10 @@ DEBUGOPT(CodeViewCommandLine, 1, 0)
 /// Whether emit extra debug info for sample pgo profile collection.
 DEBUGOPT(DebugInfoForProfiling, 1, 0)
 
+/// Whether to generate pseudo variables and their debug info for intermediate
+/// pointer accesses.
+DEBUGOPT(DebugInfoForPointerType, 1, 0)
+
 /// Whether to emit .debug_gnu_pubnames section instead of .debug_pubnames.
 DEBUGOPT(DebugNameTable, 2, 0)
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7f4fa33748faca..96b22d3f7640dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1675,6 +1675,10 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit extra debug info to make sample profile more accurate">,
   NegFlag<SetFalse>>;
+def fdebug_info_for_pointer_type : Flag<["-"], "fdebug-info-for-pointer-type">,
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
+  HelpText<"Generate pseudo variables and their debug info for intermediate pointer accesses">,
+  MarshallingInfoFlag<CodeGenOpts<"DebugInfoForPointerType">>;
 def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 0f3f684d61dc94..6ce40da22dc97d 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5636,6 +5636,22 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var,
   Var->addDebugInfo(GVE);
 }
 
+void CGDebugInfo::EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
+                                     SourceLocation Loc) {
+  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
+  unsigned Column = getColumnNumber(Loc);
+  llvm::DILocalVariable *D = DBuilder.createAutoVariable(
+      LexicalBlockStack.back(), Alloca->getName(), getOrCreateFile(Loc), Line,
+      getOrCreateType(Ty, Unit));
+  llvm::DILocation *DIL =
+      llvm::DILocation::get(CGM.getLLVMContext(), Line, Column,
+                            LexicalBlockStack.back(), CurInlinedAt);
+  SmallVector<uint64_t> Expr;
+  DBuilder.insertDeclare(Alloca, D, DBuilder.createExpression(Expr), DIL,
+                         Alloca->getParent());
+}
+
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
                                   const GlobalDecl GD) {
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 7b60e94555d060..a2c484f50b2bc5 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -529,6 +529,12 @@ class CGDebugInfo {
   /// Emit information about an external variable.
   void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
+  /// Emit debug information for a pseudo variable assigned to the value of an
+  /// intermediate expression, so that a performance counter can track the usage
+  /// of a specific expression of interest.
+  void EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
+                          SourceLocation Loc);
+
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
 
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..5f7b2529179003 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -793,6 +793,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
   Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
   if (!lifetime) {
     llvm::Value *value = EmitScalarExpr(init);
+    if (CGM.getCodeGenOpts().getDebugInfo() >
+            llvm::codegenoptions::DebugLineTablesOnly &&
+        CGM.getCodeGenOpts().DebugInfoForPointerType)
+      value = UnemitPseudoVariable(value);
     if (capturedByInit)
       drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
     EmitNullabilityCheck(lvalue, value, init->getExprLoc());
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c5f6b6d3a99f0b..b979c0830c5b34 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -951,6 +951,58 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
   return nullptr;
 }
 
+/// When a pseudo variable is created for %1, it generates these instructions
+/// in sequence and return %2:
+/// %pseudo = alloca Ty
+/// call void @llvm.dbg.declare(metadata ptr %pseudo, metadata, metadata)
+/// store Ty %1, ptr %pseudo
+/// %2 = load ptr, ptr %pseudo
+/// To undo, we detect and remove this sequence, and replace %2 back to %1.
+llvm::Value *CodeGenFunction::UnemitPseudoVariable(llvm::Value *V) {
+  if (!getDebugInfo())
+    return V;
+
+  if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(V)) {
+    llvm::Value *PseudoVar = Load->getPointerOperand();
+    if (llvm::StoreInst *Store =
+            dyn_cast<llvm::StoreInst>(Load->getPrevNode())) {
+      if (Store->getPointerOperand() != PseudoVar)
+        return V;
+      llvm::Value *OriginalValue = Store->getValueOperand();
+      if (llvm::CallInst *DbgCall =
+              dyn_cast<llvm::CallInst>(Store->getPrevNode())) {
+        if (DbgCall->getCalledFunction() !=
+                llvm::Intrinsic::getDeclaration(&CGM.getModule(),
+                                                llvm::Intrinsic::dbg_declare) ||
+            DbgCall->getNumOperands() != 4)
+          return V;
+        for (int i = 0; i < 3; i++) {
+          if (!isa<llvm::MetadataAsValue>(DbgCall->getArgOperand(i)))
+            return V;
+        }
+        if (llvm::MetadataAsValue *Metadata =
+                dyn_cast<llvm::MetadataAsValue>(DbgCall->getOperand(0))) {
+          if (llvm::ValueAsMetadata *Value =
+                  dyn_cast<llvm::ValueAsMetadata>(Metadata->getMetadata())) {
+            if (Value->getValue() != PseudoVar)
+              return V;
+            if (llvm::AllocaInst *Alloca =
+                    dyn_cast<llvm::AllocaInst>(DbgCall->getPrevNode())) {
+              V->replaceAllUsesWith(OriginalValue);
+              Load->eraseFromParent();
+              Store->eraseFromParent();
+              DbgCall->eraseFromParent();
+              Alloca->eraseFromParent();
+              return OriginalValue;
+            }
+          }
+        }
+      }
+    }
+  }
+  return V;
+}
+
 namespace {
 
 /// \p StructAccessBase returns the base \p Expr of a field access. It returns
@@ -2015,6 +2067,29 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
                         llvm::MDNode::get(getLLVMContext(), std::nullopt));
     }
 
+  // if -g2 or above and -fdebug-info-for-pointer-type are enabled, emit
+  // additional debug info for loads in an intermediate expression, which allows
+  // a performance counter to deduce the type of the value being loaded, even if
+  // it does not correspond to a variable in the source code.
+  // Since there is no variable correspond to an intermediate expression, we
+  // create a pseudo variable for it and emit its debug info, as if the
+  // expression were written in SSA form.
+  if (CGM.getCodeGenOpts().getDebugInfo() > llvm::codegenoptions::DebugLineTablesOnly &&
+      CGM.getCodeGenOpts().DebugInfoForPointerType) {
+    if (CGDebugInfo *DI = getDebugInfo())
+      // We only generate this debug info if loading from GEP, not from other
+      // cases such as loading a function argument.
+      if (isa<llvm::GetElementPtrInst>(Load->getOperand(0))) {
+        const llvm::DebugLoc &DebugLoc = Load->getDebugLoc();
+        llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(
+            Load->getType(), nullptr, Twine("pseudo_").concat(Twine(DebugLoc.getLine())).concat("_").concat(Twine(DebugLoc.getCol())));
+        DI->EmitPseudoVariable(PseudoVar, Ty, Loc);
+        Address PseudoVarAddr(PseudoVar, Load->getType(), Addr.getAlignment());
+        Builder.CreateStore(Load, PseudoVarAddr);
+        Load = Builder.CreateLoad(PseudoVarAddr);
+      }
+  }
+
   return EmitFromMemory(Load, Ty);
 }
 
@@ -5569,6 +5644,10 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
     }
 
     RValue RV = EmitAnyExpr(E->getRHS());
+    if (CGM.getCodeGenOpts().getDebugInfo() > llvm::codegenoptions::DebugLineTablesOnly &&
+        CGM.getCodeGenOpts().DebugInfoForPointerType)
+      if (isa<DeclRefExpr>(E->getLHS()) && RV.isScalar())
+        RV = RValue::get(UnemitPseudoVariable(RV.getScalarVal()));
     LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store);
     if (RV.isScalar())
       EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 143ad64e8816b1..36a572ace2ef64 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3104,6 +3104,11 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Get the record field index as represented in debug info.
   unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);
 
+  /// When the result of EmitLoadOfScalar is immediately assigned to a declared
+  /// variable, the pseudo variable emitted for it (when the flag
+  /// -fdebug-info-for-pointer-type is specified) should be undone since there
+  /// is already a debug value emitted for the declared variable.
+  llvm::Value *UnemitPseudoVariable(llvm::Value *V);
 
   //===--------------------------------------------------------------------===//
   //                            Declaration Emission
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index bcba7cbbdb58c2..7882c4f1225f1f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4256,6 +4256,9 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
   // decision should be made in the driver as well though.
   llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();
 
+  if (Args.hasArg(options::OPT_fdebug_info_for_pointer_type))
+    CmdArgs.push_back("-fdebug-info-for-pointer-type");
+
   bool SplitDWARFInlining =
       Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
                    options::OPT_fno_split_dwarf_inlining, false);

>From 95235b97774f8700ee9108e057ac360e86c79597 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Wed, 6 Mar 2024 05:59:07 -0500
Subject: [PATCH 2/9] Code cleanup

---
 clang/lib/CodeGen/CGDebugInfo.cpp |  9 ++++
 clang/lib/CodeGen/CGDebugInfo.h   |  7 +++
 clang/lib/CodeGen/CGDecl.cpp      |  5 +-
 clang/lib/CodeGen/CGExpr.cpp      | 80 ++++++++++++++-----------------
 4 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 6ce40da22dc97d..7aa96332e495b3 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5652,6 +5652,15 @@ void CGDebugInfo::EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
                          Alloca->getParent());
 }
 
+llvm::MDNode *CGDebugInfo::GetPseudoVariableAnnotation() {
+  if (!PseudoVariableAnnotation)
+    PseudoVariableAnnotation =
+        llvm::MDNode::get(CGM.getLLVMContext(),
+                          llvm::MDString::get(CGM.getLLVMContext(),
+                                              "fdebug-info-for-pointer-type"));
+  return PseudoVariableAnnotation;
+}
+
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
                                   const GlobalDecl GD) {
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index a2c484f50b2bc5..047095073c6965 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -170,6 +170,9 @@ class CGDebugInfo {
   /// The key is coroutine real parameters, value is DIVariable in LLVM IR.
   Param2DILocTy ParamDbgMappings;
 
+  /// Cached object for GetPseudoVariableAnnotation().
+  llvm::MDNode *PseudoVariableAnnotation = nullptr;
+
   /// Helper functions for getOrCreateType.
   /// @{
   /// Currently the checksum of an interface includes the number of
@@ -535,6 +538,10 @@ class CGDebugInfo {
   void EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
                           SourceLocation Loc);
 
+  /// Get the special annotation tag that indicates the instruction is
+  /// associated with EmitPseudoVariable.
+  llvm::MDNode *GetPseudoVariableAnnotation();
+
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
 
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 5f7b2529179003..04cd0eaddb83ad 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -793,10 +793,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
   Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
   if (!lifetime) {
     llvm::Value *value = EmitScalarExpr(init);
-    if (CGM.getCodeGenOpts().getDebugInfo() >
-            llvm::codegenoptions::DebugLineTablesOnly &&
-        CGM.getCodeGenOpts().DebugInfoForPointerType)
-      value = UnemitPseudoVariable(value);
+    value = UnemitPseudoVariable(value);
     if (capturedByInit)
       drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
     EmitNullabilityCheck(lvalue, value, init->getExprLoc());
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index b979c0830c5b34..43c962da7a3e3e 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -957,45 +957,26 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
 /// call void @llvm.dbg.declare(metadata ptr %pseudo, metadata, metadata)
 /// store Ty %1, ptr %pseudo
 /// %2 = load ptr, ptr %pseudo
-/// To undo, we detect and remove this sequence, and replace %2 back to %1.
+/// To undo, we detect and remove this sequence, and replace %2 back with %1.
 llvm::Value *CodeGenFunction::UnemitPseudoVariable(llvm::Value *V) {
-  if (!getDebugInfo())
-    return V;
-
-  if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(V)) {
-    llvm::Value *PseudoVar = Load->getPointerOperand();
-    if (llvm::StoreInst *Store =
-            dyn_cast<llvm::StoreInst>(Load->getPrevNode())) {
-      if (Store->getPointerOperand() != PseudoVar)
-        return V;
-      llvm::Value *OriginalValue = Store->getValueOperand();
-      if (llvm::CallInst *DbgCall =
-              dyn_cast<llvm::CallInst>(Store->getPrevNode())) {
-        if (DbgCall->getCalledFunction() !=
-                llvm::Intrinsic::getDeclaration(&CGM.getModule(),
-                                                llvm::Intrinsic::dbg_declare) ||
-            DbgCall->getNumOperands() != 4)
-          return V;
-        for (int i = 0; i < 3; i++) {
-          if (!isa<llvm::MetadataAsValue>(DbgCall->getArgOperand(i)))
-            return V;
-        }
-        if (llvm::MetadataAsValue *Metadata =
-                dyn_cast<llvm::MetadataAsValue>(DbgCall->getOperand(0))) {
-          if (llvm::ValueAsMetadata *Value =
-                  dyn_cast<llvm::ValueAsMetadata>(Metadata->getMetadata())) {
-            if (Value->getValue() != PseudoVar)
-              return V;
-            if (llvm::AllocaInst *Alloca =
-                    dyn_cast<llvm::AllocaInst>(DbgCall->getPrevNode())) {
-              V->replaceAllUsesWith(OriginalValue);
-              Load->eraseFromParent();
-              Store->eraseFromParent();
-              DbgCall->eraseFromParent();
-              Alloca->eraseFromParent();
-              return OriginalValue;
-            }
+  if (CGDebugInfo *DI = getDebugInfo()) {
+    if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(V)) {
+      if (llvm::MDNode *Tag =
+              Load->getMetadata(llvm::LLVMContext::MD_annotation)) {
+        if (Tag == DI->GetPseudoVariableAnnotation()) {
+          llvm::Value *PseudoVar = Load->getPointerOperand();
+          llvm::AllocaInst *Alloca = dyn_cast<llvm::AllocaInst>(PseudoVar);
+          llvm::StoreInst *Store =
+              dyn_cast_if_present<llvm::StoreInst>(Load->getPrevNode());
+          assert(Store && Store->getPointerOperand() == PseudoVar);
+          llvm::Value *OriginalValue = Store->getValueOperand();
+          V->replaceAllUsesWith(OriginalValue);
+          assert(Store->getPrevNode()->getPrevNode() == PseudoVar);
+          auto It = Load->getIterator();
+          for (int i = 0; i < 4; i++) {
+            (It--)->eraseFromParent();
           }
+          return OriginalValue;
         }
       }
     }
@@ -2074,20 +2055,31 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
   // Since there is no variable correspond to an intermediate expression, we
   // create a pseudo variable for it and emit its debug info, as if the
   // expression were written in SSA form.
-  if (CGM.getCodeGenOpts().getDebugInfo() > llvm::codegenoptions::DebugLineTablesOnly &&
+  if (CGM.getCodeGenOpts().getDebugInfo() >
+          llvm::codegenoptions::DebugLineTablesOnly &&
       CGM.getCodeGenOpts().DebugInfoForPointerType) {
-    if (CGDebugInfo *DI = getDebugInfo())
+    if (CGDebugInfo *DI = getDebugInfo()) {
       // We only generate this debug info if loading from GEP, not from other
       // cases such as loading a function argument.
       if (isa<llvm::GetElementPtrInst>(Load->getOperand(0))) {
         const llvm::DebugLoc &DebugLoc = Load->getDebugLoc();
-        llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(
-            Load->getType(), nullptr, Twine("pseudo_").concat(Twine(DebugLoc.getLine())).concat("_").concat(Twine(DebugLoc.getCol())));
+        llvm::AllocaInst *PseudoVar =
+            Builder.CreateAlloca(Load->getType(), nullptr,
+                                 Twine("pseudo_")
+                                     .concat(Twine(DebugLoc.getLine()))
+                                     .concat("_")
+                                     .concat(Twine(DebugLoc.getCol())));
         DI->EmitPseudoVariable(PseudoVar, Ty, Loc);
         Address PseudoVarAddr(PseudoVar, Load->getType(), Addr.getAlignment());
         Builder.CreateStore(Load, PseudoVarAddr);
         Load = Builder.CreateLoad(PseudoVarAddr);
+        // Set a special metadata tag to this instruction, in the case we need
+        // to revert it because there is already a destination variable for the
+        // load.
+        Load->setMetadata(llvm::LLVMContext::MD_annotation,
+                          DI->GetPseudoVariableAnnotation());
       }
+    }
   }
 
   return EmitFromMemory(Load, Ty);
@@ -5644,10 +5636,8 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
     }
 
     RValue RV = EmitAnyExpr(E->getRHS());
-    if (CGM.getCodeGenOpts().getDebugInfo() > llvm::codegenoptions::DebugLineTablesOnly &&
-        CGM.getCodeGenOpts().DebugInfoForPointerType)
-      if (isa<DeclRefExpr>(E->getLHS()) && RV.isScalar())
-        RV = RValue::get(UnemitPseudoVariable(RV.getScalarVal()));
+    if (isa<DeclRefExpr>(E->getLHS()) && RV.isScalar())
+      RV = RValue::get(UnemitPseudoVariable(RV.getScalarVal()));
     LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store);
     if (RV.isScalar())
       EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());

>From 37d3c1b74d1931819de211bbdb9ff07bf7c092d7 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Wed, 13 Mar 2024 18:07:42 -0400
Subject: [PATCH 3/9] Rewrite code to emit debug info for pointer instead of
 pointee, as requested by kernel dev.

---
 clang/lib/CodeGen/CGDebugInfo.cpp   | 68 ++++++++++++++++++++++------
 clang/lib/CodeGen/CGDebugInfo.h     | 17 +++----
 clang/lib/CodeGen/CGDecl.cpp        |  1 -
 clang/lib/CodeGen/CGExpr.cpp        | 69 -----------------------------
 clang/lib/CodeGen/CGExprScalar.cpp  | 19 +++++++-
 clang/lib/CodeGen/CodeGenFunction.h |  5 ---
 llvm/include/llvm/IR/DIBuilder.h    |  2 +
 7 files changed, 79 insertions(+), 102 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 7aa96332e495b3..1aaed9e9f15eb7 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5636,29 +5636,69 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var,
   Var->addDebugInfo(GVE);
 }
 
-void CGDebugInfo::EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
+void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
+                                     llvm::Instruction *Value, QualType Ty,
                                      SourceLocation Loc) {
+  // Only when -g2 or above is specified, debug info for variables will be
+  // generated.
+  if (CGM.getCodeGenOpts().getDebugInfo() <=
+      llvm::codegenoptions::DebugLineTablesOnly)
+    return;
+
   llvm::DIFile *Unit = getOrCreateFile(Loc);
+  llvm::DIType *Type = getOrCreateType(Ty, Unit);
+
+  // Check if Value is already a declared variable and has debug info, in this
+  // case we have nothing to do. Clang emits declared variable as alloca, and
+  // it is loaded upon use, so we identify such pattern here.
+  if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(Value)) {
+    llvm::Value *Var = Load->getPointerOperand();
+    if (llvm::Metadata *MDValue = llvm::ValueAsMetadata::getIfExists(Var)) {
+      if (llvm::Value *DbgValue = llvm::MetadataAsValue::getIfExists(
+              CGM.getLLVMContext(), MDValue)) {
+        for (llvm::User *U : DbgValue->users()) {
+          if (llvm::CallInst *DbgDeclare = dyn_cast<llvm::CallInst>(U)) {
+            if (DbgDeclare->getCalledFunction() == DBuilder.GetDeclareFn() &&
+                DbgDeclare->getArgOperand(0) == DbgValue) {
+              // There can be implicit type cast applied on a variable if it is
+              // an opaque ptr, in this case its debug info may not match the
+              // actual type of object being used as in the next instruction, so
+              // we will need to emit a pseudo variable for type-casted value.
+              llvm::DILocalVariable *MDNode = dyn_cast<llvm::DILocalVariable>(
+                  dyn_cast<llvm::MetadataAsValue>(DbgDeclare->getOperand(1))
+                      ->getMetadata());
+              if (MDNode->getType() == Type)
+                return;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Insert a sequence of instructions to materialize Value on the stack.
+  auto SaveInsertionPoint = Builder.saveIP();
+  Builder.SetInsertPoint(++(Value->getIterator()));
+  llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType());
+  Address PseudoVarAddr(PseudoVar, Value->getType(),
+                        CharUnits::fromQuantity(PseudoVar->getAlign()));
+  llvm::LoadInst *Load = Builder.CreateLoad(PseudoVarAddr);
+  Value->replaceAllUsesWith(Load);
+  Builder.SetInsertPoint(Load);
+  Builder.CreateStore(Value, PseudoVarAddr);
+
+  // Emit debug info for materialized Value.
   unsigned Line = getLineNumber(Loc);
   unsigned Column = getColumnNumber(Loc);
   llvm::DILocalVariable *D = DBuilder.createAutoVariable(
-      LexicalBlockStack.back(), Alloca->getName(), getOrCreateFile(Loc), Line,
-      getOrCreateType(Ty, Unit));
+      LexicalBlockStack.back(), "pseudo_var", Unit, Line, Type);
   llvm::DILocation *DIL =
       llvm::DILocation::get(CGM.getLLVMContext(), Line, Column,
                             LexicalBlockStack.back(), CurInlinedAt);
   SmallVector<uint64_t> Expr;
-  DBuilder.insertDeclare(Alloca, D, DBuilder.createExpression(Expr), DIL,
-                         Alloca->getParent());
-}
-
-llvm::MDNode *CGDebugInfo::GetPseudoVariableAnnotation() {
-  if (!PseudoVariableAnnotation)
-    PseudoVariableAnnotation =
-        llvm::MDNode::get(CGM.getLLVMContext(),
-                          llvm::MDString::get(CGM.getLLVMContext(),
-                                              "fdebug-info-for-pointer-type"));
-  return PseudoVariableAnnotation;
+  DBuilder.insertDeclare(PseudoVar, D, DBuilder.createExpression(Expr), DIL,
+                         Load);
+  Builder.restoreIP(SaveInsertionPoint);
 }
 
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 047095073c6965..6452861a1be104 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -170,9 +170,6 @@ class CGDebugInfo {
   /// The key is coroutine real parameters, value is DIVariable in LLVM IR.
   Param2DILocTy ParamDbgMappings;
 
-  /// Cached object for GetPseudoVariableAnnotation().
-  llvm::MDNode *PseudoVariableAnnotation = nullptr;
-
   /// Helper functions for getOrCreateType.
   /// @{
   /// Currently the checksum of an interface includes the number of
@@ -532,15 +529,11 @@ class CGDebugInfo {
   /// Emit information about an external variable.
   void EmitExternalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
-  /// Emit debug information for a pseudo variable assigned to the value of an
-  /// intermediate expression, so that a performance counter can track the usage
-  /// of a specific expression of interest.
-  void EmitPseudoVariable(llvm::AllocaInst *Alloca, QualType Ty,
-                          SourceLocation Loc);
-
-  /// Get the special annotation tag that indicates the instruction is
-  /// associated with EmitPseudoVariable.
-  llvm::MDNode *GetPseudoVariableAnnotation();
+  /// Emit a pseudo variable and debug info for an intermediate value if it does
+  /// not correspond to a variable in the source code, so that a profiler can
+  /// track more accurate usage of certain instructions of interest.
+  void EmitPseudoVariable(CGBuilderTy &Builder, llvm::Instruction *Value,
+                          QualType Ty, SourceLocation Loc);
 
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 04cd0eaddb83ad..bbe14ef4c17244 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -793,7 +793,6 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
   Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
   if (!lifetime) {
     llvm::Value *value = EmitScalarExpr(init);
-    value = UnemitPseudoVariable(value);
     if (capturedByInit)
       drillIntoBlockVariable(*this, lvalue, cast<VarDecl>(D));
     EmitNullabilityCheck(lvalue, value, init->getExprLoc());
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 43c962da7a3e3e..c5f6b6d3a99f0b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -951,39 +951,6 @@ static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
   return nullptr;
 }
 
-/// When a pseudo variable is created for %1, it generates these instructions
-/// in sequence and return %2:
-/// %pseudo = alloca Ty
-/// call void @llvm.dbg.declare(metadata ptr %pseudo, metadata, metadata)
-/// store Ty %1, ptr %pseudo
-/// %2 = load ptr, ptr %pseudo
-/// To undo, we detect and remove this sequence, and replace %2 back with %1.
-llvm::Value *CodeGenFunction::UnemitPseudoVariable(llvm::Value *V) {
-  if (CGDebugInfo *DI = getDebugInfo()) {
-    if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(V)) {
-      if (llvm::MDNode *Tag =
-              Load->getMetadata(llvm::LLVMContext::MD_annotation)) {
-        if (Tag == DI->GetPseudoVariableAnnotation()) {
-          llvm::Value *PseudoVar = Load->getPointerOperand();
-          llvm::AllocaInst *Alloca = dyn_cast<llvm::AllocaInst>(PseudoVar);
-          llvm::StoreInst *Store =
-              dyn_cast_if_present<llvm::StoreInst>(Load->getPrevNode());
-          assert(Store && Store->getPointerOperand() == PseudoVar);
-          llvm::Value *OriginalValue = Store->getValueOperand();
-          V->replaceAllUsesWith(OriginalValue);
-          assert(Store->getPrevNode()->getPrevNode() == PseudoVar);
-          auto It = Load->getIterator();
-          for (int i = 0; i < 4; i++) {
-            (It--)->eraseFromParent();
-          }
-          return OriginalValue;
-        }
-      }
-    }
-  }
-  return V;
-}
-
 namespace {
 
 /// \p StructAccessBase returns the base \p Expr of a field access. It returns
@@ -2048,40 +2015,6 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
                         llvm::MDNode::get(getLLVMContext(), std::nullopt));
     }
 
-  // if -g2 or above and -fdebug-info-for-pointer-type are enabled, emit
-  // additional debug info for loads in an intermediate expression, which allows
-  // a performance counter to deduce the type of the value being loaded, even if
-  // it does not correspond to a variable in the source code.
-  // Since there is no variable correspond to an intermediate expression, we
-  // create a pseudo variable for it and emit its debug info, as if the
-  // expression were written in SSA form.
-  if (CGM.getCodeGenOpts().getDebugInfo() >
-          llvm::codegenoptions::DebugLineTablesOnly &&
-      CGM.getCodeGenOpts().DebugInfoForPointerType) {
-    if (CGDebugInfo *DI = getDebugInfo()) {
-      // We only generate this debug info if loading from GEP, not from other
-      // cases such as loading a function argument.
-      if (isa<llvm::GetElementPtrInst>(Load->getOperand(0))) {
-        const llvm::DebugLoc &DebugLoc = Load->getDebugLoc();
-        llvm::AllocaInst *PseudoVar =
-            Builder.CreateAlloca(Load->getType(), nullptr,
-                                 Twine("pseudo_")
-                                     .concat(Twine(DebugLoc.getLine()))
-                                     .concat("_")
-                                     .concat(Twine(DebugLoc.getCol())));
-        DI->EmitPseudoVariable(PseudoVar, Ty, Loc);
-        Address PseudoVarAddr(PseudoVar, Load->getType(), Addr.getAlignment());
-        Builder.CreateStore(Load, PseudoVarAddr);
-        Load = Builder.CreateLoad(PseudoVarAddr);
-        // Set a special metadata tag to this instruction, in the case we need
-        // to revert it because there is already a destination variable for the
-        // load.
-        Load->setMetadata(llvm::LLVMContext::MD_annotation,
-                          DI->GetPseudoVariableAnnotation());
-      }
-    }
-  }
-
   return EmitFromMemory(Load, Ty);
 }
 
@@ -5636,8 +5569,6 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
     }
 
     RValue RV = EmitAnyExpr(E->getRHS());
-    if (isa<DeclRefExpr>(E->getLHS()) && RV.isScalar())
-      RV = RValue::get(UnemitPseudoVariable(RV.getScalarVal()));
     LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store);
     if (RV.isScalar())
       EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 181b15e9c7d0a7..0d64607bac399a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1787,7 +1787,24 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
     }
   }
 
-  return EmitLoadOfLValue(E);
+  llvm::Value *Result = EmitLoadOfLValue(E);
+
+  // If -fdebug-info-for-pointer-type is specified, emit a pseudo variable and
+  // its debug info for the pointer, even if there is no variable associated
+  // with the pointer's expression.
+  if (CGF.CGM.getCodeGenOpts().DebugInfoForPointerType && CGF.getDebugInfo()) {
+    if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(Result)) {
+      if (llvm::GetElementPtrInst *GEP =
+              dyn_cast<llvm::GetElementPtrInst>(Load->getPointerOperand())) {
+        if (llvm::Instruction *Pointer =
+                dyn_cast<llvm::Instruction>(GEP->getPointerOperand())) {
+          CGF.getDebugInfo()->EmitPseudoVariable(
+              Builder, Pointer, E->getBase()->getType(), E->getExprLoc());
+        }
+      }
+    }
+  }
+  return Result;
 }
 
 Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 36a572ace2ef64..143ad64e8816b1 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3104,11 +3104,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Get the record field index as represented in debug info.
   unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);
 
-  /// When the result of EmitLoadOfScalar is immediately assigned to a declared
-  /// variable, the pseudo variable emitted for it (when the flag
-  /// -fdebug-info-for-pointer-type is specified) should be undone since there
-  /// is already a debug value emitted for the declared variable.
-  llvm::Value *UnemitPseudoVariable(llvm::Value *V);
 
   //===--------------------------------------------------------------------===//
   //                            Declaration Emission
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index edec161b397155..e420f73e152907 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -1024,6 +1024,8 @@ namespace llvm {
       N->replaceAllUsesWith(Replacement);
       return Replacement;
     }
+
+    Function *GetDeclareFn() { return DeclareFn; }
   };
 
   // Create wrappers for C Binding types (see CBindingWrapping.h).

>From 75faf06a3e604444fabacec1d0df65cce47d1d95 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Thu, 14 Mar 2024 04:57:54 -0400
Subject: [PATCH 4/9] More precise debug loc

---
 clang/lib/CodeGen/CGDebugInfo.cpp  | 10 +++++-----
 clang/lib/CodeGen/CGDebugInfo.h    |  2 +-
 clang/lib/CodeGen/CGExprScalar.cpp |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 1aaed9e9f15eb7..f28e83a3b99ec2 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5637,15 +5637,14 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var,
 }
 
 void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
-                                     llvm::Instruction *Value, QualType Ty,
-                                     SourceLocation Loc) {
+                                     llvm::Instruction *Value, QualType Ty) {
   // Only when -g2 or above is specified, debug info for variables will be
   // generated.
   if (CGM.getCodeGenOpts().getDebugInfo() <=
       llvm::codegenoptions::DebugLineTablesOnly)
     return;
 
-  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  llvm::DIFile *Unit = Builder.getCurrentDebugLocation()->getFile();
   llvm::DIType *Type = getOrCreateType(Ty, Unit);
 
   // Check if Value is already a declared variable and has debug info, in this
@@ -5679,6 +5678,7 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
   // Insert a sequence of instructions to materialize Value on the stack.
   auto SaveInsertionPoint = Builder.saveIP();
   Builder.SetInsertPoint(++(Value->getIterator()));
+  Builder.SetCurrentDebugLocation(Value->getDebugLoc());
   llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType());
   Address PseudoVarAddr(PseudoVar, Value->getType(),
                         CharUnits::fromQuantity(PseudoVar->getAlign()));
@@ -5688,8 +5688,8 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
   Builder.CreateStore(Value, PseudoVarAddr);
 
   // Emit debug info for materialized Value.
-  unsigned Line = getLineNumber(Loc);
-  unsigned Column = getColumnNumber(Loc);
+  unsigned Line = Builder.getCurrentDebugLocation().getLine();
+  unsigned Column = Builder.getCurrentDebugLocation().getCol();
   llvm::DILocalVariable *D = DBuilder.createAutoVariable(
       LexicalBlockStack.back(), "pseudo_var", Unit, Line, Type);
   llvm::DILocation *DIL =
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 6452861a1be104..2756ea1e7b4f1e 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -533,7 +533,7 @@ class CGDebugInfo {
   /// not correspond to a variable in the source code, so that a profiler can
   /// track more accurate usage of certain instructions of interest.
   void EmitPseudoVariable(CGBuilderTy &Builder, llvm::Instruction *Value,
-                          QualType Ty, SourceLocation Loc);
+                          QualType Ty);
 
   /// Emit information about global variable alias.
   void EmitGlobalAlias(const llvm::GlobalValue *GV, const GlobalDecl Decl);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 0d64607bac399a..3fc615336a30d2 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1799,7 +1799,7 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
         if (llvm::Instruction *Pointer =
                 dyn_cast<llvm::Instruction>(GEP->getPointerOperand())) {
           CGF.getDebugInfo()->EmitPseudoVariable(
-              Builder, Pointer, E->getBase()->getType(), E->getExprLoc());
+              Builder, Pointer, E->getBase()->getType());
         }
       }
     }

>From 404bd99a38e88909d5f9378cc716024cdf64adfe Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Thu, 14 Mar 2024 23:17:02 -0400
Subject: [PATCH 5/9] Correct debug type info for "a.b" expr

---
 clang/lib/CodeGen/CGExprScalar.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 3fc615336a30d2..cad79e77052842 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1798,8 +1798,10 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
               dyn_cast<llvm::GetElementPtrInst>(Load->getPointerOperand())) {
         if (llvm::Instruction *Pointer =
                 dyn_cast<llvm::Instruction>(GEP->getPointerOperand())) {
-          CGF.getDebugInfo()->EmitPseudoVariable(
-              Builder, Pointer, E->getBase()->getType());
+          QualType Ty = E->getBase()->getType();
+          if (!E->isArrow())
+            Ty = CGF.getContext().getPointerType(Ty);
+          CGF.getDebugInfo()->EmitPseudoVariable(Builder, Pointer, Ty);
         }
       }
     }

>From 630137e311aa68a42ff57a69306f518933f090ca Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Thu, 14 Mar 2024 23:44:39 -0400
Subject: [PATCH 6/9] Add test case

---
 .../test/CodeGenCXX/debug-info-ptr-to-ptr.cpp | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp

diff --git a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
new file mode 100644
index 00000000000000..6758ef445d463a
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
@@ -0,0 +1,120 @@
+// Test debug info for intermediate value of a chained pointer deferencing
+// expression when the flag -fdebug-info-for-pointer-type is enabled.
+// RUN: %clang_cc1 %s -fdebug-info-for-pointer-type -debug-info-kind=constructor -S -emit-llvm -o - | FileCheck %s
+
+class A {
+public:
+  int i;
+  char c;
+  void *p;
+  int arr[3];
+};
+
+class B {
+public:
+  A* a;
+};
+
+class C {
+public:
+  B* b;
+  A* a;
+  A arr[10];
+};
+
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func1{{.*}}(
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.B, ptr {{%.*}}, i32 0, i32 0, !dbg [[DBG1:![0-9]+]]
+// CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[A_ADDR]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr, align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    store ptr [[A]], ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META1:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]], align {{.*}}, !dbg [[DBG1]]
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0,
+int func1(B *b) {
+  return b->a->i;
+}
+
+// Should generate a pseudo variable when pointer is type-casted.
+// CHECK-LABEL: define dso_local noundef ptr @{{.*}}func2{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META2:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[B:%.*]] = load ptr, ptr [[B_ADDR]],
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[B]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META3:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.B, ptr [[TMP1]], i32 0,
+A* func2(void *b) {
+  return ((B*)b)->a;
+}
+
+// Should not generate pseudo variable in this case.
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func3{{.*}}(
+// CHECK:    call void @llvm.dbg.declare(metadata ptr [[B_ADDR:%.*]], metadata [[META4:![0-9]+]], metadata !DIExpression())
+// CHECK:    call void @llvm.dbg.declare(metadata ptr [[LOCAL1:%.*]], metadata [[META5:![0-9]+]], metadata !DIExpression())
+// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr
+int func3(B *b) {
+  A *local1 = b->a;
+  return local1->i;
+}
+
+// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func4{{.*}}(
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.C, ptr {{%.*}}, i32 0, i32 1
+// CHECK-NEXT:    [[A:%.*]] = load ptr, ptr [[A_ADDR]],
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[A]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META6:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 0,
+// CHECK:         [[CALL:%.*]] = call noundef ptr @{{.*}}foo{{.*}}(
+// CHECK-NEXT:    [[PSEUDO2:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[CALL]], ptr [[PSEUDO2]]
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO2]], metadata [[META6]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[PSEUDO2]]
+// CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds %class.A, ptr [[TMP2]], i32 0, i32 1
+char func4(C *c) {
+  extern A* foo(int x);
+  return foo(c->a->i)->c;
+}
+
+// CHECK-LABEL: define dso_local noundef signext i8 @{{.*}}func5{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META7:![0-9]+]], metadata !DIExpression())
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META8:![0-9]+]], metadata !DIExpression())
+// CHECK:         [[A_ADDR:%.*]] = getelementptr inbounds %class.A, ptr {{%.*}}, i64 {{%.*}},
+// CHECK-NEXT:    [[PSEUDO1:%.*]] = alloca ptr,
+// CHECK-NEXT:    store ptr [[A_ADDR]], ptr [[PSEUDO1]],
+// CHECK-NEXT:    call void @llvm.dbg.declare(metadata ptr [[PSEUDO1]], metadata [[META9:![0-9]+]], metadata !DIExpression())
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[PSEUDO1]],
+// CHECK-NEXT:    {{%.*}} = getelementptr inbounds %class.A, ptr [[TMP1]], i32 0, i32 1,
+char func5(void *arr, int n) {
+  return ((A*)arr)[n].c;
+}
+
+// CHECK-LABEL: define dso_local noundef i32 @{{.*}}func6{{.*}}(
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META10:![0-9]+]], metadata !DIExpression())
+// CHECK:         call void @llvm.dbg.declare(metadata ptr {{%.*}}, metadata [[META11:![0-9]+]], metadata !DIExpression())
+int func6(B &b) {
+  return reinterpret_cast<A&>(b).i;
+}
+
+// CHECK-DAG: [[META_A:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "A",
+// CHECK-DAG: [[META_AP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_A]],
+// CHECK-DAG: [[META_B:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "B",
+// CHECK-DAG: [[META_BP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_B]],
+// CHECK-DAG: [[META_C:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_class_type, name: "C",
+// CHECK-DAG: [[META_CP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META_C]],
+// CHECK-DAG: [[META_VP:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null,
+// CHECK-DAG: [[META_I32:![0-9]+]] = !DIBasicType(name: "int", size: 32,
+// CHECK-DAG: [[META_BR:![0-9]+]] = !DIDerivedType(tag: DW_TAG_reference_type, baseType: [[META_B]],
+
+// CHECK-DAG: [[DBG1]] = !DILocation(line: 34, column: 13,
+// CHECK-DAG: [[META1]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 34, type: [[META_AP]])
+// CHECK-DAG: [[META2]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 46, type: [[META_VP]])
+// CHECK-DAG: [[META3]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 47, type: [[META_BP]])
+// CHECK-DAG: [[META4]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 55, type: [[META_BP]])
+// CHECK-DAG: [[META5]] = !DILocalVariable(name: "local1", scope: {{.*}}, file: {{.*}}, line: 56, type: [[META_AP]])
+// CHECK-DAG: [[META6]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 76, type: [[META_AP]])
+// CHECK-DAG: [[META7]] = !DILocalVariable(name: "arr", arg: 1, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_VP]])
+// CHECK-DAG: [[META8]] = !DILocalVariable(name: "n", arg: 2, scope: {{.*}}, file: {{.*}}, line: 88, type: [[META_I32]])
+// CHECK-DAG: [[META9]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 89, type: [[META_AP]])
+// CHECK-DAG: [[META10]] = !DILocalVariable(name: "b", arg: 1, scope: {{.*}}, file: {{.*}}, line: 95, type: [[META_BR]])
+// CHECK-DAG: [[META11]] = !DILocalVariable(name: "pseudo_var", scope: {{.*}}, file: {{.*}}, line: 96, type: [[META_AP]])

>From 6a1e4e81f4c8c9973ba11502cd6d73cd04151ee6 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Tue, 19 Mar 2024 02:53:01 -0400
Subject: [PATCH 7/9] Handle case for invoke instruction, and the instruction
 for which pseudo variable is generated is located at the end of BB

---
 clang/lib/CodeGen/CGDebugInfo.cpp | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index f28e83a3b99ec2..1c42c4deb8745b 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5644,7 +5644,12 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
       llvm::codegenoptions::DebugLineTablesOnly)
     return;
 
-  llvm::DIFile *Unit = Builder.getCurrentDebugLocation()->getFile();
+  // Not supported for invoke instruction.
+  if (Value->isTerminator())
+    return;
+
+  llvm::DebugLoc DL = Builder.getCurrentDebugLocation();
+  llvm::DIFile *Unit = DL->getFile();
   llvm::DIType *Type = getOrCreateType(Ty, Unit);
 
   // Check if Value is already a declared variable and has debug info, in this
@@ -5677,7 +5682,11 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
 
   // Insert a sequence of instructions to materialize Value on the stack.
   auto SaveInsertionPoint = Builder.saveIP();
-  Builder.SetInsertPoint(++(Value->getIterator()));
+  llvm::Instruction *Next = Value->getIterator()->getNextNode();
+  if (Next)
+    Builder.SetInsertPoint(Next);
+  else
+    Builder.SetInsertPoint(Value->getParent());
   Builder.SetCurrentDebugLocation(Value->getDebugLoc());
   llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType());
   Address PseudoVarAddr(PseudoVar, Value->getType(),
@@ -5688,8 +5697,8 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
   Builder.CreateStore(Value, PseudoVarAddr);
 
   // Emit debug info for materialized Value.
-  unsigned Line = Builder.getCurrentDebugLocation().getLine();
-  unsigned Column = Builder.getCurrentDebugLocation().getCol();
+  unsigned Line = DL.getLine();
+  unsigned Column = DL.getCol();
   llvm::DILocalVariable *D = DBuilder.createAutoVariable(
       LexicalBlockStack.back(), "pseudo_var", Unit, Line, Type);
   llvm::DILocation *DIL =

>From a0dab26bee51f3ec533bec9303462c69a34dc461 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Tue, 19 Mar 2024 21:31:17 -0400
Subject: [PATCH 8/9] Fix incorrect debug loc after builder emits pseudo
 variable in some cases

---
 clang/lib/CodeGen/CGDebugInfo.cpp | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 1c42c4deb8745b..a9a384165b63db 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5644,12 +5644,7 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
       llvm::codegenoptions::DebugLineTablesOnly)
     return;
 
-  // Not supported for invoke instruction.
-  if (Value->isTerminator())
-    return;
-
-  llvm::DebugLoc DL = Builder.getCurrentDebugLocation();
-  llvm::DIFile *Unit = DL->getFile();
+  llvm::DIFile *Unit = Builder.getCurrentDebugLocation()->getFile();
   llvm::DIType *Type = getOrCreateType(Ty, Unit);
 
   // Check if Value is already a declared variable and has debug info, in this
@@ -5680,14 +5675,20 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
     }
   }
 
-  // Insert a sequence of instructions to materialize Value on the stack.
+  // Find the correct location to insert a sequence of instructions to
+  // materialize Value on the stack.
   auto SaveInsertionPoint = Builder.saveIP();
-  llvm::Instruction *Next = Value->getIterator()->getNextNode();
-  if (Next)
+  if (llvm::InvokeInst *Invoke = dyn_cast<llvm::InvokeInst>(Value))
+    Builder.SetInsertPoint(Invoke->getNormalDest()->begin());
+  else if (llvm::Instruction *Next = Value->getIterator()->getNextNode())
     Builder.SetInsertPoint(Next);
   else
     Builder.SetInsertPoint(Value->getParent());
-  Builder.SetCurrentDebugLocation(Value->getDebugLoc());
+  auto SaveDebugLoc = Builder.getCurrentDebugLocation();
+  llvm::DebugLoc DL = Value->getDebugLoc();
+  if (DL.get())
+    Builder.SetCurrentDebugLocation(DL);
+
   llvm::AllocaInst *PseudoVar = Builder.CreateAlloca(Value->getType());
   Address PseudoVarAddr(PseudoVar, Value->getType(),
                         CharUnits::fromQuantity(PseudoVar->getAlign()));
@@ -5697,8 +5698,8 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
   Builder.CreateStore(Value, PseudoVarAddr);
 
   // Emit debug info for materialized Value.
-  unsigned Line = DL.getLine();
-  unsigned Column = DL.getCol();
+  unsigned Line = Builder.getCurrentDebugLocation().getLine();
+  unsigned Column = Builder.getCurrentDebugLocation().getCol();
   llvm::DILocalVariable *D = DBuilder.createAutoVariable(
       LexicalBlockStack.back(), "pseudo_var", Unit, Line, Type);
   llvm::DILocation *DIL =
@@ -5707,7 +5708,9 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
   SmallVector<uint64_t> Expr;
   DBuilder.insertDeclare(PseudoVar, D, DBuilder.createExpression(Expr), DIL,
                          Load);
+
   Builder.restoreIP(SaveInsertionPoint);
+  Builder.SetCurrentDebugLocation(SaveDebugLoc);
 }
 
 void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,

>From 4f6750bea635520d9892c6695993b005af34bc1f Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Wed, 20 Mar 2024 14:39:14 -0400
Subject: [PATCH 9/9] Merge this feature into -fdebug-info-for-profiling option
 instead of having a new flag

---
 clang/include/clang/Basic/DebugOptions.def      | 4 ----
 clang/include/clang/Driver/Options.td           | 4 ----
 clang/lib/CodeGen/CGDebugInfo.cpp               | 3 ++-
 clang/lib/CodeGen/CGExprScalar.cpp              | 8 ++++----
 clang/lib/Driver/ToolChains/Clang.cpp           | 3 ---
 clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp | 2 +-
 llvm/include/llvm/IR/DIBuilder.h                | 2 --
 7 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/clang/include/clang/Basic/DebugOptions.def b/clang/include/clang/Basic/DebugOptions.def
index 6dd09f46842077..7cd3edf08a17ea 100644
--- a/clang/include/clang/Basic/DebugOptions.def
+++ b/clang/include/clang/Basic/DebugOptions.def
@@ -129,10 +129,6 @@ DEBUGOPT(CodeViewCommandLine, 1, 0)
 /// Whether emit extra debug info for sample pgo profile collection.
 DEBUGOPT(DebugInfoForProfiling, 1, 0)
 
-/// Whether to generate pseudo variables and their debug info for intermediate
-/// pointer accesses.
-DEBUGOPT(DebugInfoForPointerType, 1, 0)
-
 /// Whether to emit .debug_gnu_pubnames section instead of .debug_pubnames.
 DEBUGOPT(DebugNameTable, 2, 0)
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 96b22d3f7640dd..7f4fa33748faca 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1675,10 +1675,6 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit extra debug info to make sample profile more accurate">,
   NegFlag<SetFalse>>;
-def fdebug_info_for_pointer_type : Flag<["-"], "fdebug-info-for-pointer-type">,
-  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
-  HelpText<"Generate pseudo variables and their debug info for intermediate pointer accesses">,
-  MarshallingInfoFlag<CodeGenOpts<"DebugInfoForPointerType">>;
 def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index a9a384165b63db..514807f6e22a06 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5657,7 +5657,8 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
               CGM.getLLVMContext(), MDValue)) {
         for (llvm::User *U : DbgValue->users()) {
           if (llvm::CallInst *DbgDeclare = dyn_cast<llvm::CallInst>(U)) {
-            if (DbgDeclare->getCalledFunction() == DBuilder.GetDeclareFn() &&
+            if (DbgDeclare->getCalledFunction()->getIntrinsicID() ==
+                    llvm::Intrinsic::dbg_declare &&
                 DbgDeclare->getArgOperand(0) == DbgValue) {
               // There can be implicit type cast applied on a variable if it is
               // an opaque ptr, in this case its debug info may not match the
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index cad79e77052842..41f5465b23c1d2 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1789,10 +1789,10 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
 
   llvm::Value *Result = EmitLoadOfLValue(E);
 
-  // If -fdebug-info-for-pointer-type is specified, emit a pseudo variable and
-  // its debug info for the pointer, even if there is no variable associated
-  // with the pointer's expression.
-  if (CGF.CGM.getCodeGenOpts().DebugInfoForPointerType && CGF.getDebugInfo()) {
+  // If -fdebug_info_for_profiling is specified, emit a pseudo variable and its
+  // debug info for the pointer, even if there is no variable associated with
+  // the pointer's expression.
+  if (CGF.CGM.getCodeGenOpts().DebugInfoForProfiling && CGF.getDebugInfo()) {
     if (llvm::LoadInst *Load = dyn_cast<llvm::LoadInst>(Result)) {
       if (llvm::GetElementPtrInst *GEP =
               dyn_cast<llvm::GetElementPtrInst>(Load->getPointerOperand())) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 7882c4f1225f1f..bcba7cbbdb58c2 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4256,9 +4256,6 @@ renderDebugOptions(const ToolChain &TC, const Driver &D, const llvm::Triple &T,
   // decision should be made in the driver as well though.
   llvm::DebuggerKind DebuggerTuning = TC.getDefaultDebuggerTuning();
 
-  if (Args.hasArg(options::OPT_fdebug_info_for_pointer_type))
-    CmdArgs.push_back("-fdebug-info-for-pointer-type");
-
   bool SplitDWARFInlining =
       Args.hasFlag(options::OPT_fsplit_dwarf_inlining,
                    options::OPT_fno_split_dwarf_inlining, false);
diff --git a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
index 6758ef445d463a..28bbc137cce677 100644
--- a/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
+++ b/clang/test/CodeGenCXX/debug-info-ptr-to-ptr.cpp
@@ -1,6 +1,6 @@
 // Test debug info for intermediate value of a chained pointer deferencing
 // expression when the flag -fdebug-info-for-pointer-type is enabled.
-// RUN: %clang_cc1 %s -fdebug-info-for-pointer-type -debug-info-kind=constructor -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -fdebug-info-for-profiling -debug-info-kind=constructor -S -emit-llvm -o - | FileCheck %s
 
 class A {
 public:
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index e420f73e152907..edec161b397155 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -1024,8 +1024,6 @@ namespace llvm {
       N->replaceAllUsesWith(Replacement);
       return Replacement;
     }
-
-    Function *GetDeclareFn() { return DeclareFn; }
   };
 
   // Create wrappers for C Binding types (see CBindingWrapping.h).



More information about the cfe-commits mailing list