[clang] 7aa1fa0 - Reland "[dwarf] Emit a DIGlobalVariable for constant strings."

Mitch Phillips via cfe-commits cfe-commits at lists.llvm.org
Wed May 18 13:58:56 PDT 2022


Author: Mitch Phillips
Date: 2022-05-18T13:56:45-07:00
New Revision: 7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91

URL: https://github.com/llvm/llvm-project/commit/7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91
DIFF: https://github.com/llvm/llvm-project/commit/7aa1fa0a0a07f7949d2d77c099aab43cf9b75a91.diff

LOG: Reland "[dwarf] Emit a DIGlobalVariable for constant strings."

An upcoming patch will extend llvm-symbolizer to provide the source line
information for global variables. The goal is to move AddressSanitizer
off of internal debug info for symbolization onto the DWARF standard
(and doing a clean-up in the process). Currently, ASan reports the line
information for constant strings if a memory safety bug happens around
them. We want to keep this behaviour, so we need to emit debuginfo for
these variables as well.

Reviewed By: dblaikie, rnk, aprantl

Differential Revision: https://reviews.llvm.org/D123534

Added: 
    clang/test/CodeGen/debug-info-variables.c
    llvm/test/DebugInfo/COFF/global-no-strings.ll

Modified: 
    clang/lib/CodeGen/CGDebugInfo.cpp
    clang/lib/CodeGen/CGDebugInfo.h
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/test/VFS/external-names.c
    llvm/lib/AsmParser/LLParser.cpp
    llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
    llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp

Removed: 
    llvm/test/Assembler/invalid-diglobalvariable-missing-name.ll


################################################################################
diff  --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 3d73bfb8ce79..753427029441 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -5132,7 +5132,7 @@ std::string CGDebugInfo::GetName(const Decl *D, bool Qualified) const {
     return Name;
   codegenoptions::DebugTemplateNamesKind TemplateNamesKind =
       CGM.getCodeGenOpts().getDebugSimpleTemplateNames();
-  
+
   if (!CGM.getCodeGenOpts().hasReducedDebugInfo())
     TemplateNamesKind = codegenoptions::DebugTemplateNamesKind::Full;
 
@@ -5459,6 +5459,21 @@ void CGDebugInfo::EmitGlobalAlias(const llvm::GlobalValue *GV,
   ImportedDeclCache[GD.getCanonicalDecl().getDecl()].reset(ImportDI);
 }
 
+void CGDebugInfo::AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
+                                            const StringLiteral *S) {
+  SourceLocation Loc = S->getStrTokenLoc(0);
+  PresumedLoc PLoc = CGM.getContext().getSourceManager().getPresumedLoc(Loc);
+  if (!PLoc.isValid())
+    return;
+
+  llvm::DIFile *File = getOrCreateFile(Loc);
+  llvm::DIGlobalVariableExpression *Debug =
+      DBuilder.createGlobalVariableExpression(
+          nullptr, StringRef(), StringRef(), getOrCreateFile(Loc),
+          getLineNumber(Loc), getOrCreateType(S->getType(), File), true);
+  GV->addDebugInfo(Debug);
+}
+
 llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
   if (!LexicalBlockStack.empty())
     return LexicalBlockStack.back();

diff  --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 8984f3eb1d7a..38e3fa5b2fa9 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -533,6 +533,14 @@ class CGDebugInfo {
   /// Emit an @import declaration.
   void EmitImportDecl(const ImportDecl &ID);
 
+  /// DebugInfo isn't attached to string literals by default. While certain
+  /// aspects of debuginfo aren't useful for string literals (like a name), it's
+  /// nice to be able to symbolize the line and column information. This is
+  /// especially useful for sanitizers, as it allows symbolization of
+  /// heap-buffer-overflows on constant strings.
+  void AddStringLiteralDebugInfo(llvm::GlobalVariable *GV,
+                                 const StringLiteral *S);
+
   /// Emit C++ namespace alias.
   llvm::DIImportedEntity *EmitNamespaceAlias(const NamespaceAliasDecl &NA);
 

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index f8bf210dc0e2..703cf4edf5f5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -5670,6 +5670,11 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
   }
 
   auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, Alignment);
+
+  CGDebugInfo *DI = getModuleDebugInfo();
+  if (DI && getCodeGenOpts().hasReducedDebugInfo())
+    DI->AddStringLiteralDebugInfo(GV, S);
+
   if (Entry)
     *Entry = GV;
 

diff  --git a/clang/test/CodeGen/debug-info-variables.c b/clang/test/CodeGen/debug-info-variables.c
new file mode 100644
index 000000000000..8ec60ff7c1d9
--- /dev/null
+++ b/clang/test/CodeGen/debug-info-variables.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 %s -debug-info-kind=standalone -S -emit-llvm -o - | FileCheck %s
+
+// CHECK: DIGlobalVariable(name: "global",{{.*}} line: [[@LINE+1]]
+int global = 42;
+
+// CHECK: DIGlobalVariable({{.*}}line: [[@LINE+4]],{{.*}} type: [[TYPEID:![0-9]+]]
+// CHECK: [[TYPEID]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[BASETYPE:![0-9]+]]
+// CHECK: [[BASETYPE]] = !DIBasicType(name: "char"
+const char* s() {
+  return "1234567890";
+}
+
+// Note: Windows has `q -> p -> r` ordering and Linux has `p -> q -> r`.
+// CHECK-DAG: DILocalVariable(name: "p"
+// CHECK-DAG: DILocalVariable(name: "q"
+// CHECK-DAG: DILocalVariable(name: "r"
+int sum(int p, int q) {
+  int r = p + q;
+  return r;
+}

diff  --git a/clang/test/VFS/external-names.c b/clang/test/VFS/external-names.c
index c50d240a32eb..964b174ee57e 100644
--- a/clang/test/VFS/external-names.c
+++ b/clang/test/VFS/external-names.c
@@ -30,8 +30,8 @@
 // Debug info
 
 // RUN: %clang_cc1 -I %t -ivfsoverlay %t.external.yaml -triple %itanium_abi_triple -debug-info-kind=limited -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-DEBUG-EXTERNAL %s
-// CHECK-DEBUG-EXTERNAL: !DISubprogram({{.*}}file: ![[Num:[0-9]+]]
-// CHECK-DEBUG-EXTERNAL: ![[Num]] = !DIFile(filename: "{{[^"]*}}Inputs{{..?}}external-names.h"
+// CHECK-DEBUG-EXTERNAL: ![[Num:[0-9]+]] = !DIFile(filename: "{{[^"]*}}Inputs{{..?}}external-names.h"
+// CHECK-DEBUG-EXTERNAL: !DISubprogram({{.*}}file: ![[Num]]
 
 // RUN: %clang_cc1 -I %t -ivfsoverlay %t.yaml -triple %itanium_abi_triple -debug-info-kind=limited -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-DEBUG %s
 // CHECK-DEBUG-NOT: Inputs

diff  --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index d6d8743e13f3..5076c0bff44d 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -5012,7 +5012,7 @@ bool LLParser::parseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
 ///                         declaration: !4, align: 8)
 bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(name, MDStringField, (/* AllowEmpty */ false));                     \
+  OPTIONAL(name, MDStringField, (/* AllowEmpty */ false));                     \
   OPTIONAL(scope, MDField, );                                                  \
   OPTIONAL(linkageName, MDStringField, );                                      \
   OPTIONAL(file, MDField, );                                                   \

diff  --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 83d77cbeeac6..ece4167c791c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -3175,6 +3175,11 @@ void CodeViewDebug::collectGlobalVariableInfo() {
     for (const auto *GVE : CU->getGlobalVariables()) {
       const DIGlobalVariable *DIGV = GVE->getVariable();
       const DIExpression *DIE = GVE->getExpression();
+      // Don't emit string literals in CodeView, as the only useful parts are
+      // generally the filename and line number, which isn't possible to output
+      // in CodeView. String literals should be the only unnamed GlobalVariable
+      // with debug info.
+      if (DIGV->getName().empty()) continue;
 
       if ((DIE->getNumElements() == 2) &&
           (DIE->getElement(0) == dwarf::DW_OP_plus_uconst))

diff  --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 06fd724b2197..b2095ae5d11d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -165,7 +165,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
   } else {
     DeclContext = GV->getScope();
     // Add name and type.
-    addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
+    StringRef DisplayName = GV->getDisplayName();
+    if (!DisplayName.empty())
+      addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
     if (GTy)
       addType(*VariableDIE, GTy);
 

diff  --git a/llvm/test/Assembler/invalid-diglobalvariable-missing-name.ll b/llvm/test/Assembler/invalid-diglobalvariable-missing-name.ll
deleted file mode 100644
index baf4d73d94f4..000000000000
--- a/llvm/test/Assembler/invalid-diglobalvariable-missing-name.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-
-; CHECK: <stdin>:[[@LINE+1]]:24: error: missing required field 'name'
-!0 = !DIGlobalVariable()

diff  --git a/llvm/test/DebugInfo/COFF/global-no-strings.ll b/llvm/test/DebugInfo/COFF/global-no-strings.ll
new file mode 100644
index 000000000000..bc3a3a11dd13
--- /dev/null
+++ b/llvm/test/DebugInfo/COFF/global-no-strings.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s
+
+; This test ensures that no debuginfo is emitted for the constant "123456789"
+; string. These global variables have debug expressions because DWARF has the
+; ability to tie them to a file name and line number, but this isn't possible
+; in CodeView, so we make sure it's omitted to save file size.
+;
+; The various CodeView outputs should have a description of "my_string", but not
+; the string contents itself.
+
+; C++ source to regenerate:
+; $ cat a.cpp
+; char* my_string =
+;   "12345679";
+;
+; $ clang-cl a.cpp /GS- /Z7 /GR- /clang:-S /clang:-emit-llvm
+
+; CHECK-NOT: S_LDATA
+; CHECK: S_GDATA
+; CHECK-NOT: S_LDATA
+; CHECK-NOT: S_GDATA
+
+; ModuleID = '/tmp/a.c'
+source_filename = "/tmp/a.c"
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.20.0"
+
+$"??_C at _08HCBFHPJA@12345679?$AA@" = comdat any
+
+@"??_C at _08HCBFHPJA@12345679?$AA@" = linkonce_odr dso_local unnamed_addr constant [9 x i8] c"12345679\00", comdat, align 1, !dbg !0
+ at my_string = dso_local global ptr @"??_C at _08HCBFHPJA@12345679?$AA@", align 8, !dbg !7
+
+!llvm.dbg.cu = !{!9}
+!llvm.linker.options = !{!13, !14}
+!llvm.module.flags = !{!15, !16, !17, !18, !19}
+!llvm.ident = !{!20}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(scope: null, file: !2, line: 2, type: !3, isLocal: true, isDefinition: true)
+!2 = !DIFile(filename: "/tmp/a.c", directory: "", checksumkind: CSK_MD5, checksum: "b972961d64de3c90497767110ab58eb6")
+!3 = !DICompositeType(tag: DW_TAG_array_type, baseType: !4, size: 72, elements: !5)
+!4 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!5 = !{!6}
+!6 = !DISubrange(count: 9)
+!7 = !DIGlobalVariableExpression(var: !8, expr: !DIExpression())
+!8 = distinct !DIGlobalVariable(name: "my_string", scope: !9, file: !2, line: 1, type: !12, isLocal: false, isDefinition: true)
+!9 = distinct !DICompileUnit(language: DW_LANG_C99, file: !10, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 7c1c0849f8a1a6f1bf5f5b554484bbf8b0debd0a)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !11, splitDebugInlining: false, nameTableKind: None)
+!10 = !DIFile(filename: "/tmp/a.c", directory: "/usr/local/google/home/mitchp/llvm-build/opt", checksumkind: CSK_MD5, checksum: "b972961d64de3c90497767110ab58eb6")
+!11 = !{!0, !7}
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !4, size: 64)
+!13 = !{!"/DEFAULTLIB:libcmt.lib"}
+!14 = !{!"/DEFAULTLIB:oldnames.lib"}
+!15 = !{i32 2, !"CodeView", i32 1}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{i32 1, !"wchar_size", i32 2}
+!18 = !{i32 7, !"PIC Level", i32 2}
+!19 = !{i32 7, !"uwtable", i32 2}
+!20 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 7c1c0849f8a1a6f1bf5f5b554484bbf8b0debd0a)"}


        


More information about the cfe-commits mailing list