[clang] [compiler-rt] [llvm] [ASan] Add metadata to renamed instructions so ASan doesn't use the i… (PR #119387)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 7 09:13:10 PST 2025


https://github.com/gbMattN updated https://github.com/llvm/llvm-project/pull/119387

>From 8781ff2355750ae61d140620b1f6862537de07e3 Mon Sep 17 00:00:00 2001
From: gbMattN <matthew.nagy at sony.com>
Date: Tue, 10 Dec 2024 15:01:37 +0000
Subject: [PATCH 1/7] [ASan] Add metadata to renamed instructions so ASan
 doesn't use the incorrect name

---
 llvm/lib/IR/ValueSymbolTable.cpp                         | 8 ++++++++
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 7 ++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/IR/ValueSymbolTable.cpp b/llvm/lib/IR/ValueSymbolTable.cpp
index a020acf22a96c5..81bb3f3c5a5e35 100644
--- a/llvm/lib/IR/ValueSymbolTable.cpp
+++ b/llvm/lib/IR/ValueSymbolTable.cpp
@@ -123,6 +123,14 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
   }
 
   // Otherwise, there is a naming conflict.  Rename this value.
+  // If we are renaming an instruction, ASan needs to know for it to serialize
+  // properly
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    MDString *trueNameMetadata = MDString::get(V->getContext(), Name);
+    llvm::MDTuple *tuple =
+        llvm::MDTuple::get(V->getContext(), trueNameMetadata);
+    I->setMetadata("OriginalName", tuple);
+  }
   SmallString<256> UniqueName(Name.begin(), Name.end());
   return makeUniqueName(V, UniqueName);
 }
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index cb84588318496c..c696cc38167cd4 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -3430,7 +3430,12 @@ void FunctionStackPoisoner::processStaticAllocas() {
   SmallVector<ASanStackVariableDescription, 16> SVD;
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
-    ASanStackVariableDescription D = {AI->getName().data(),
+    std::string Name = AI->getName().data();
+    if (AI->hasMetadata("OriginalName")) {
+      MDTuple *tuple = dyn_cast<MDTuple>(AI->getMetadata("OriginalName"));
+      Name = dyn_cast<MDString>(tuple->getOperand(0))->getString();
+    }
+    ASanStackVariableDescription D = {Name.c_str(),
                                       ASan.getAllocaSizeInBytes(*AI),
                                       0,
                                       AI->getAlign().value(),

>From 25efafa3d67afb6a9107fdd502f5f6e4f40c311c Mon Sep 17 00:00:00 2001
From: gbMattN <matthew.nagy at sony.com>
Date: Wed, 11 Dec 2024 11:44:01 +0000
Subject: [PATCH 2/7] [bugfix] Fixed string pointer being used out of scope

---
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index c696cc38167cd4..2051fa94678175 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -3430,12 +3430,12 @@ void FunctionStackPoisoner::processStaticAllocas() {
   SmallVector<ASanStackVariableDescription, 16> SVD;
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
-    std::string Name = AI->getName().data();
+    const char* Name = AI->getName().data();
     if (AI->hasMetadata("OriginalName")) {
       MDTuple *tuple = dyn_cast<MDTuple>(AI->getMetadata("OriginalName"));
-      Name = dyn_cast<MDString>(tuple->getOperand(0))->getString();
+      Name = dyn_cast<MDString>(tuple->getOperand(0))->getString().data();
     }
-    ASanStackVariableDescription D = {Name.c_str(),
+    ASanStackVariableDescription D = {Name,
                                       ASan.getAllocaSizeInBytes(*AI),
                                       0,
                                       AI->getAlign().value(),

>From c15ec2fe9bbe4855272a519b7f6dccca57a294eb Mon Sep 17 00:00:00 2001
From: gbMattN <matthew.nagy at sony.com>
Date: Tue, 17 Dec 2024 16:47:11 +0000
Subject: [PATCH 3/7] Now only emit metadata when using a ASan, and tag it with
 an enum rather than a string

---
 clang/lib/CodeGen/CGExpr.cpp                    |  8 ++++++++
 llvm/include/llvm/IR/FixedMetadataKinds.def     |  1 +
 llvm/lib/IR/ValueSymbolTable.cpp                |  8 --------
 .../Instrumentation/AddressSanitizer.cpp        | 17 +++++++----------
 4 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 5fccc9cbb37ec1..d8fdacf30e12e3 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -137,6 +137,14 @@ llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
     Alloca =
         new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(),
                              ArraySize, Name, AllocaInsertPt->getIterator());
+  if (Alloca->getName() != Name.str() &&
+      SanOpts.Mask & SanitizerKind::Address) {
+
+    llvm::LLVMContext &ctx = Alloca->getContext();
+    llvm::MDString *trueNameMetadata = llvm::MDString::get(ctx, Name.str());
+    llvm::MDTuple *tuple = llvm::MDTuple::get(ctx, trueNameMetadata);
+    Alloca->setMetadata(llvm::LLVMContext::MD_unaltered_name, tuple);
+  }
   if (Allocas) {
     Allocas->Add(Alloca);
   }
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index df572e8791e13b..41fa34bf09ff65 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -53,3 +53,4 @@ LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
 LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
 LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
 LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)
+LLVM_FIXED_MD_KIND(MD_unaltered_name, "unaltered.name", 42)
diff --git a/llvm/lib/IR/ValueSymbolTable.cpp b/llvm/lib/IR/ValueSymbolTable.cpp
index 81bb3f3c5a5e35..a020acf22a96c5 100644
--- a/llvm/lib/IR/ValueSymbolTable.cpp
+++ b/llvm/lib/IR/ValueSymbolTable.cpp
@@ -123,14 +123,6 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
   }
 
   // Otherwise, there is a naming conflict.  Rename this value.
-  // If we are renaming an instruction, ASan needs to know for it to serialize
-  // properly
-  if (auto *I = dyn_cast<Instruction>(V)) {
-    MDString *trueNameMetadata = MDString::get(V->getContext(), Name);
-    llvm::MDTuple *tuple =
-        llvm::MDTuple::get(V->getContext(), trueNameMetadata);
-    I->setMetadata("OriginalName", tuple);
-  }
   SmallString<256> UniqueName(Name.begin(), Name.end());
   return makeUniqueName(V, UniqueName);
 }
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 2051fa94678175..87f79bdaa16429 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -3430,18 +3430,15 @@ void FunctionStackPoisoner::processStaticAllocas() {
   SmallVector<ASanStackVariableDescription, 16> SVD;
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
-    const char* Name = AI->getName().data();
-    if (AI->hasMetadata("OriginalName")) {
-      MDTuple *tuple = dyn_cast<MDTuple>(AI->getMetadata("OriginalName"));
+    const char *Name = AI->getName().data();
+    if (AI->hasMetadata(LLVMContext::MD_unaltered_name)) {
+      MDTuple *tuple =
+          dyn_cast<MDTuple>(AI->getMetadata(LLVMContext::MD_unaltered_name));
       Name = dyn_cast<MDString>(tuple->getOperand(0))->getString().data();
     }
-    ASanStackVariableDescription D = {Name,
-                                      ASan.getAllocaSizeInBytes(*AI),
-                                      0,
-                                      AI->getAlign().value(),
-                                      AI,
-                                      0,
-                                      0};
+    ASanStackVariableDescription D = {
+        Name, ASan.getAllocaSizeInBytes(*AI), 0, AI->getAlign().value(), AI, 0,
+        0};
     SVD.push_back(D);
   }
 

>From 38f210b3b25ff05c8c61a96a564fa8a83c7a03ea Mon Sep 17 00:00:00 2001
From: gbMattN <matthew.nagy at sony.com>
Date: Wed, 18 Dec 2024 11:17:44 +0000
Subject: [PATCH 4/7] Added a test

---
 .../asan/TestCases/shadowed-stack-serialization.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp

diff --git a/compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp b/compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp
new file mode 100644
index 00000000000000..f4d9ad5f6ea5f7
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp
@@ -0,0 +1,13 @@
+// RUN: %clangxx_asan -O0 %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+int main()
+{
+	int x;
+	{
+		int x;
+		delete &x;
+	}
+}
+
+// CHECK: [32, 36) 'x'

>From 3a5c89ac2b6aa98d1a090a54605191263e6a0c71 Mon Sep 17 00:00:00 2001
From: gbMattN <matthew.nagy at sony.com>
Date: Tue, 7 Jan 2025 13:34:51 +0000
Subject: [PATCH 5/7] fixup

---
 .../asan/TestCases/shadowed-stack-serialization.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp b/compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp
index f4d9ad5f6ea5f7..f2706c671c2612 100644
--- a/compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp
+++ b/compiler-rt/test/asan/TestCases/shadowed-stack-serialization.cpp
@@ -1,13 +1,12 @@
 // RUN: %clangxx_asan -O0 %s -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
-int main()
-{
-	int x;
-	{
-		int x;
-		delete &x;
-	}
+int main() {
+  int x;
+  {
+    int x;
+    delete &x;
+  }
 }
 
 // CHECK: [32, 36) 'x'

>From 788906779002cbafc9a00d9810bc105455ae905e Mon Sep 17 00:00:00 2001
From: gbMattN <matthew.nagy at sony.com>
Date: Tue, 7 Jan 2025 17:10:22 +0000
Subject: [PATCH 6/7] Rework to use annotations rather than a new metadata type

---
 clang/lib/CodeGen/CGExpr.cpp                  |  6 +-----
 llvm/include/llvm/IR/FixedMetadataKinds.def   |  1 -
 .../Instrumentation/AddressSanitizer.cpp      | 19 +++++++++++++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d8fdacf30e12e3..2689cc6f0104e4 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -139,11 +139,7 @@ llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
                              ArraySize, Name, AllocaInsertPt->getIterator());
   if (Alloca->getName() != Name.str() &&
       SanOpts.Mask & SanitizerKind::Address) {
-
-    llvm::LLVMContext &ctx = Alloca->getContext();
-    llvm::MDString *trueNameMetadata = llvm::MDString::get(ctx, Name.str());
-    llvm::MDTuple *tuple = llvm::MDTuple::get(ctx, trueNameMetadata);
-    Alloca->setMetadata(llvm::LLVMContext::MD_unaltered_name, tuple);
+    Alloca->addAnnotationMetadata({"alloca_name_altered", Name.str()});
   }
   if (Allocas) {
     Allocas->Add(Alloca);
diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def
index 41fa34bf09ff65..df572e8791e13b 100644
--- a/llvm/include/llvm/IR/FixedMetadataKinds.def
+++ b/llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -53,4 +53,3 @@ LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38)
 LLVM_FIXED_MD_KIND(MD_coro_outside_frame, "coro.outside.frame", 39)
 LLVM_FIXED_MD_KIND(MD_mmra, "mmra", 40)
 LLVM_FIXED_MD_KIND(MD_noalias_addrspace, "noalias.addrspace", 41)
-LLVM_FIXED_MD_KIND(MD_unaltered_name, "unaltered.name", 42)
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 87f79bdaa16429..7a693c1aee1e72 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -3431,10 +3431,21 @@ void FunctionStackPoisoner::processStaticAllocas() {
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
     const char *Name = AI->getName().data();
-    if (AI->hasMetadata(LLVMContext::MD_unaltered_name)) {
-      MDTuple *tuple =
-          dyn_cast<MDTuple>(AI->getMetadata(LLVMContext::MD_unaltered_name));
-      Name = dyn_cast<MDString>(tuple->getOperand(0))->getString().data();
+    if (AI->hasMetadata(LLVMContext::MD_annotation)) {
+      MDTuple *Annotation = (MDTuple *)AI->getMetadata(LLVMContext::MD_annotation);
+      for (int i = 0; i < Annotation->getNumOperands(); i++) {
+        if (auto Tuple = dyn_cast<MDTuple>(Annotation->getOperand(i))) {
+          for (int i = 0; i < Tuple->getNumOperands(); i++) {
+            if (auto stringMetadata = dyn_cast<MDString>(Tuple->getOperand(i))) {
+              if (stringMetadata->getString() == "alloca_name_altered") {
+                Name = ((MDString *)Tuple->getOperand(i + 1).get())
+                          ->getString()
+                          .data();
+              }
+            }
+          }
+        }
+      }
     }
     ASanStackVariableDescription D = {
         Name, ASan.getAllocaSizeInBytes(*AI), 0, AI->getAlign().value(), AI, 0,

>From b16f97d07b944ddf36dcdea4ba3d50bc40b52fab Mon Sep 17 00:00:00 2001
From: gbMattN <matthew.nagy at sony.com>
Date: Tue, 7 Jan 2025 17:12:57 +0000
Subject: [PATCH 7/7] Use StringRef rather than const char*

---
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7a693c1aee1e72..1db4c00f3b28b4 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -3430,7 +3430,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
   SmallVector<ASanStackVariableDescription, 16> SVD;
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
-    const char *Name = AI->getName().data();
+    StringRef Name = AI->getName();
     if (AI->hasMetadata(LLVMContext::MD_annotation)) {
       MDTuple *Annotation = (MDTuple *)AI->getMetadata(LLVMContext::MD_annotation);
       for (int i = 0; i < Annotation->getNumOperands(); i++) {
@@ -3439,8 +3439,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
             if (auto stringMetadata = dyn_cast<MDString>(Tuple->getOperand(i))) {
               if (stringMetadata->getString() == "alloca_name_altered") {
                 Name = ((MDString *)Tuple->getOperand(i + 1).get())
-                          ->getString()
-                          .data();
+                          ->getString();
               }
             }
           }
@@ -3448,7 +3447,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
       }
     }
     ASanStackVariableDescription D = {
-        Name, ASan.getAllocaSizeInBytes(*AI), 0, AI->getAlign().value(), AI, 0,
+        Name.data(), ASan.getAllocaSizeInBytes(*AI), 0, AI->getAlign().value(), AI, 0,
         0};
     SVD.push_back(D);
   }



More information about the llvm-commits mailing list