[llvm] Fix: Distinguish CFI Metadata Checks in MergeFunctions Pass (PR #65963)

Oskar Wirga via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 12 07:11:46 PDT 2023


https://github.com/oskarwirga updated https://github.com/llvm/llvm-project/pull/65963:

>From ca15c7303ab99ab8f4d1f058df523dbded2ca155 Mon Sep 17 00:00:00 2001
From: Oskar Wirga <10386631+oskarwirga at users.noreply.github.com>
Date: Mon, 11 Sep 2023 14:25:40 +0100
Subject: [PATCH 1/2] Fix: Distinguish CFI Metadata Checks in MergeFunctions
 Pass

This diff fixes an issue in the MergeFunctions pass where two different Control Flow Integrity (CFI) metadata checks were incorrectly considered identical. These merges would lead to runtime violations down the line as two separate objects contained a single destructor which itself contained checks for only one of the objects.

Here I update the comparison logic to take into account the metadata at llvm.type.test checks. Now, only truly identical checks will be considered for merging, thus preserving the integrity of each check.

Previous discussion: https://reviews.llvm.org/D154119
---
 .../Transforms/Utils/FunctionComparator.h     |  3 ++
 llvm/lib/Transforms/IPO/MergeFunctions.cpp    | 16 ++++--
 .../Transforms/Utils/FunctionComparator.cpp   | 49 +++++++++++++++++++
 .../MergeFunc/cfi-function-merging.ll         | 38 ++++++++++++++
 4 files changed, 101 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/MergeFunc/cfi-function-merging.ll

diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
index c28f868039a1f7b..5b96a18861df878 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -99,6 +99,9 @@ class FunctionComparator {
   /// Test whether the two functions have equivalent behaviour.
   int compare();
 
+  /// Test whether the two functions have equivalent distinct metadata.
+  static int cmpDistinctMetadata(const Function *L, const Function *R);
+
 protected:
   /// Start the comparison.
   void beginCompare() {
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 312a8df440bf1e3..6b0f481d1f43749 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -401,11 +401,17 @@ bool MergeFunctions::runOnModule(Module &M) {
 
   auto S = HashedFuncs.begin();
   for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
-    // If the hash value matches the previous value or the next one, we must
-    // consider merging it. Otherwise it is dropped and never considered again.
-    if ((I != S && std::prev(I)->first == I->first) ||
-        (std::next(I) != IE && std::next(I)->first == I->first) ) {
-      Deferred.push_back(WeakTrackingVH(I->second));
+    if ((I != S && std::prev(I)->first == I->first)) {
+      if (!FunctionComparator::cmpDistinctMetadata(std::prev(I)->second,
+                                                   I->second))
+        Deferred.push_back(WeakTrackingVH(I->second));
+      continue;
+    }
+    if ((std::next(I) != IE && std::next(I)->first == I->first)) {
+      if (!FunctionComparator::cmpDistinctMetadata(std::next(I)->second,
+                                                   I->second))
+        Deferred.push_back(WeakTrackingVH(I->second));
+      continue;
     }
   }
 
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index b1d74f67377e27f..defa125de7134df 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
@@ -836,6 +837,54 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
   return cmpNumbers(LeftSN.first->second, RightSN.first->second);
 }
 
+int FunctionComparator::cmpDistinctMetadata(const Function *L,
+                                            const Function *R) {
+  // Iterate over the basic blocks and instructions in Function L
+  for (const BasicBlock &BBL : *L) {
+    for (const Instruction &InstL : BBL) {
+      for (unsigned i = 0, e = InstL.getNumOperands(); i != e; ++i) {
+        if (MetadataAsValue *MDL =
+                dyn_cast<MetadataAsValue>(InstL.getOperand(i))) {
+          if (MDNode *MDNodeL = dyn_cast<MDNode>(MDL->getMetadata())) {
+            // Now iterate over the basic blocks and instructions in Function R
+            // to compare against each distinct metadata value in Function L
+            for (const BasicBlock &BBR : *R) {
+              for (const Instruction &InstR : BBR) {
+                for (unsigned j = 0, f = InstR.getNumOperands(); j != f; ++j) {
+                  if (MetadataAsValue *MDR =
+                          dyn_cast<MetadataAsValue>(InstR.getOperand(j))) {
+                    if (MDNode *MDNodeR =
+                            dyn_cast<MDNode>(MDR->getMetadata())) {
+                      // Compare distinct metadata
+                      if (MDNodeL->isDistinct() && MDNodeR->isDistinct()) {
+                        if (isa<DINode>(MDNodeL) && isa<DINode>(MDNodeR)) {
+                          continue; // Skip to the next iteration, they are
+                                    // equivalent
+                        }
+                        return -1;
+                      }
+                      if (MDNodeL->isDistinct()) {
+                        return 1;
+                      }
+                      if (MDNodeR->isDistinct()) {
+                        return -1;
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // If we reached here, there were either no distinct metadata or they were all
+  // debug info
+  return 0;
+}
+
 // Test whether two basic blocks have equivalent behaviour.
 int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
                                        const BasicBlock *BBR) const {
diff --git a/llvm/test/Transforms/MergeFunc/cfi-function-merging.ll b/llvm/test/Transforms/MergeFunc/cfi-function-merging.ll
new file mode 100644
index 000000000000000..a3d81f156f753b4
--- /dev/null
+++ b/llvm/test/Transforms/MergeFunc/cfi-function-merging.ll
@@ -0,0 +1,38 @@
+;; Check the cases involving internal CFI instrumented functions where we do not expect functions to be merged.
+; RUN: opt -S -passes=mergefunc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-linux-android28"
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare i1 @llvm.type.test(ptr, metadata) #6
+
+define internal void @A__on_zero_sharedEv(ptr noundef nonnull align 8 dereferenceable(32) %this) {
+; CHECK-LABEL: @A__on_zero_sharedEv
+entry:
+  %this.addr = alloca ptr, align 8
+  store ptr %this, ptr %this.addr, align 8
+  %this1 = load ptr, ptr %this.addr, align 8
+  %vtable = load ptr, ptr %this1, align 8
+  %0 = call i1 @llvm.type.test(ptr %vtable, metadata !11), !nosanitize !47
+  ret void
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define internal void @B__on_zero_sharedEv(ptr noundef nonnull align 8 dereferenceable(32) %this) {
+; CHECK-LABEL: @B__on_zero_sharedEv
+entry:
+  %this.addr = alloca ptr, align 8
+  store ptr %this, ptr %this.addr, align 8
+  %this1 = load ptr, ptr %this.addr, align 8
+  %vtable = load ptr, ptr %this1, align 8
+  %0 = call i1 @llvm.type.test(ptr %vtable, metadata !22), !nosanitize !47
+  ret void
+}
+
+!10 = !{i64 16, !11}
+!11 = distinct !{}
+!21 = !{i64 16, !22}
+!22 = distinct !{}
+!47 = !{}

>From d3032df933bf9dfabc51b3666b5690766de744d9 Mon Sep 17 00:00:00 2001
From: Oskar Wirga <10386631+oskarwirga at users.noreply.github.com>
Date: Mon, 11 Sep 2023 14:25:40 +0100
Subject: [PATCH 2/2] Fix: Distinguish CFI Metadata Checks in MergeFunctions
 Pass

This diff fixes an issue in the MergeFunctions pass where two different Control Flow Integrity (CFI) metadata checks were incorrectly considered identical. These merges would lead to runtime violations down the line as two separate objects contained a single destructor which itself contained checks for only one of the objects.

Here I update the comparison logic to take into account the metadata at llvm.type.test checks. Now, only truly identical checks will be considered for merging, thus preserving the integrity of each check.

Previous discussion: https://reviews.llvm.org/D154119
---
 .../Transforms/Utils/FunctionComparator.h     |  4 ++
 llvm/lib/Transforms/IPO/MergeFunctions.cpp    | 16 ++++--
 .../Transforms/Utils/FunctionComparator.cpp   | 51 +++++++++++++++++++
 .../MergeFunc/cfi-function-merging.ll         | 38 ++++++++++++++
 4 files changed, 104 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/MergeFunc/cfi-function-merging.ll

diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
index c28f868039a1f7b..3359ed92b33a41e 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -99,6 +99,10 @@ class FunctionComparator {
   /// Test whether the two functions have equivalent behaviour.
   int compare();
 
+  /// Test whether the two functions have equivalent distinct metadata. True
+  /// means they are mergeable.
+  static bool mergeableDistinctMetadata(const Function *L, const Function *R);
+
 protected:
   /// Start the comparison.
   void beginCompare() {
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 312a8df440bf1e3..e3c5442a0d3149d 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -401,11 +401,17 @@ bool MergeFunctions::runOnModule(Module &M) {
 
   auto S = HashedFuncs.begin();
   for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
-    // If the hash value matches the previous value or the next one, we must
-    // consider merging it. Otherwise it is dropped and never considered again.
-    if ((I != S && std::prev(I)->first == I->first) ||
-        (std::next(I) != IE && std::next(I)->first == I->first) ) {
-      Deferred.push_back(WeakTrackingVH(I->second));
+    if ((I != S && std::prev(I)->first == I->first)) {
+      if (FunctionComparator::mergeableDistinctMetadata(std::prev(I)->second,
+                                                        I->second))
+        Deferred.push_back(WeakTrackingVH(I->second));
+      continue;
+    }
+    if ((std::next(I) != IE && std::next(I)->first == I->first)) {
+      if (FunctionComparator::mergeableDistinctMetadata(std::next(I)->second,
+                                                        I->second))
+        Deferred.push_back(WeakTrackingVH(I->second));
+      continue;
     }
   }
 
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index b1d74f67377e27f..086aeda13d249fc 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
@@ -836,6 +837,56 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
   return cmpNumbers(LeftSN.first->second, RightSN.first->second);
 }
 
+// This function iterates over two functions and compares their
+// metadata to make sure we aren't merging functions which have
+// distinct metadata that is different. We don't care about
+// debug data here, it shouldn't change MIR like CFI would.
+bool FunctionComparator::mergeableDistinctMetadata(const Function *L,
+                                                   const Function *R) {
+  auto BBLIt = L->begin();
+  auto BBLEnd = L->end();
+  auto BBRIt = R->begin();
+  auto BBREnd = R->end();
+
+  for (; BBLIt != BBLEnd && BBRIt != BBREnd; ++BBLIt, ++BBRIt) {
+    auto InstLIt = BBLIt->begin();
+    auto InstLEnd = BBLIt->end();
+    auto InstRIt = BBRIt->begin();
+    auto InstREnd = BBRIt->end();
+
+    for (; InstLIt != InstLEnd && InstRIt != InstREnd; ++InstLIt, ++InstRIt) {
+      for (unsigned i = 0, e = InstLIt->getNumOperands(); i != e; ++i) {
+        MetadataAsValue *MDL =
+            dyn_cast<MetadataAsValue>(InstLIt->getOperand(i));
+        if (!MDL)
+          continue;
+
+        MDNode *MDNodeL = dyn_cast<MDNode>(MDL->getMetadata());
+        if (!MDNodeL || !MDNodeL->isDistinct())
+          continue;
+
+        MetadataAsValue *MDR =
+            dyn_cast<MetadataAsValue>(InstRIt->getOperand(i));
+        if (!MDR)
+          return false;
+
+        MDNode *MDNodeR = dyn_cast<MDNode>(MDR->getMetadata());
+        if (!MDNodeR || !MDNodeR->isDistinct())
+          return false;
+
+        if (isa<DINode>(MDNodeL) && isa<DINode>(MDNodeR))
+          continue;
+        if (MDNodeL == MDNodeR)
+          continue;
+
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
 // Test whether two basic blocks have equivalent behaviour.
 int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
                                        const BasicBlock *BBR) const {
diff --git a/llvm/test/Transforms/MergeFunc/cfi-function-merging.ll b/llvm/test/Transforms/MergeFunc/cfi-function-merging.ll
new file mode 100644
index 000000000000000..a3d81f156f753b4
--- /dev/null
+++ b/llvm/test/Transforms/MergeFunc/cfi-function-merging.ll
@@ -0,0 +1,38 @@
+;; Check the cases involving internal CFI instrumented functions where we do not expect functions to be merged.
+; RUN: opt -S -passes=mergefunc < %s | FileCheck %s
+
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-linux-android28"
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare i1 @llvm.type.test(ptr, metadata) #6
+
+define internal void @A__on_zero_sharedEv(ptr noundef nonnull align 8 dereferenceable(32) %this) {
+; CHECK-LABEL: @A__on_zero_sharedEv
+entry:
+  %this.addr = alloca ptr, align 8
+  store ptr %this, ptr %this.addr, align 8
+  %this1 = load ptr, ptr %this.addr, align 8
+  %vtable = load ptr, ptr %this1, align 8
+  %0 = call i1 @llvm.type.test(ptr %vtable, metadata !11), !nosanitize !47
+  ret void
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define internal void @B__on_zero_sharedEv(ptr noundef nonnull align 8 dereferenceable(32) %this) {
+; CHECK-LABEL: @B__on_zero_sharedEv
+entry:
+  %this.addr = alloca ptr, align 8
+  store ptr %this, ptr %this.addr, align 8
+  %this1 = load ptr, ptr %this.addr, align 8
+  %vtable = load ptr, ptr %this1, align 8
+  %0 = call i1 @llvm.type.test(ptr %vtable, metadata !22), !nosanitize !47
+  ret void
+}
+
+!10 = !{i64 16, !11}
+!11 = distinct !{}
+!21 = !{i64 16, !22}
+!22 = distinct !{}
+!47 = !{}



More information about the llvm-commits mailing list