[llvm] 6baaad7 - [Bitcode] Include indirect users of BlockAddresses in bitcode

Nick Desaulniers via llvm-commits llvm-commits at lists.llvm.org
Tue May 10 16:04:07 PDT 2022


Author: Wende Tan
Date: 2022-05-10T16:03:42-07:00
New Revision: 6baaad740a5abb4bfcff022a8114abb4eea66a2d

URL: https://github.com/llvm/llvm-project/commit/6baaad740a5abb4bfcff022a8114abb4eea66a2d
DIFF: https://github.com/llvm/llvm-project/commit/6baaad740a5abb4bfcff022a8114abb4eea66a2d.diff

LOG: [Bitcode] Include indirect users of BlockAddresses in bitcode

The original fix (commit 23ec5782c3cc) of
https://github.com/llvm/llvm-project/issues/52787 only adds `Function`s
that have `Instruction`s that directly use `BlockAddress`es into the
bitcode (`FUNC_CODE_BLOCKADDR_USERS`).

However, in either @rickyz's original reproducing code:

```
void f(long);

__attribute__((noinline)) static void fun(long x) {
  f(x + 1);
}

void repro(void) {
  fun(({
    label:
      (long)&&label;
  }));
}
```

```
...
define dso_local void @repro() #0 {
entry:
  br label %label

label:                                            ; preds = %entry
  tail call fastcc void @fun()
  ret void
}

define internal fastcc void @fun() unnamed_addr #1 {
entry:
  tail call void @f(i64 add (i64 ptrtoint (i8* blockaddress(@repro, %label) to i64), i64 1)) #3
  ret void
}
...
```

or the xfs and overlayfs in the Linux kernel, `BlockAddress`es (e.g.,
`i8* blockaddress(@repro, %label)`) may first compose `ConstantExpr`s
(e.g., `i64 ptrtoint (i8* blockaddress(@repro, %label) to i64)`) and
then used by `Instruction`s. This case is not handled by the original
fix.

This patch adds *indirect* users of `BlockAddress`es, i.e., the
`Instruction`s using some `Constant`s which further use the
`BlockAddress`es, into the bitcode as well, by doing depth-first
searches.

Fixes: https://github.com/llvm/llvm-project/issues/52787
Fixes: 23ec5782c3cc ("[Bitcode] materialize Functions early when BlockAddress taken")

Reviewed By: nickdesaulniers

Differential Revision: https://reviews.llvm.org/D124878

Added: 
    llvm/test/Bitcode/blockaddress-aggregate-users.ll
    llvm/test/Bitcode/blockaddress-expr-users.ll
    llvm/test/Bitcode/blockaddress-globalvalue-users.ll
    llvm/test/Bitcode/blockaddress-nested-users.ll

Modified: 
    llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index dcfe12a26903..1288c91c4eba 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -3365,7 +3366,7 @@ void ModuleBitcodeWriter::writeFunction(
   bool NeedsMetadataAttachment = F.hasMetadata();
 
   DILocation *LastDL = nullptr;
-  SmallPtrSet<Function *, 4> BlockAddressUsers;
+  SmallSetVector<Function *, 4> BlockAddressUsers;
 
   // Finally, emit all the instructions, in order.
   for (const BasicBlock &BB : F) {
@@ -3401,11 +3402,24 @@ void ModuleBitcodeWriter::writeFunction(
     }
 
     if (BlockAddress *BA = BlockAddress::lookup(&BB)) {
-      for (User *U : BA->users()) {
-        if (auto *I = dyn_cast<Instruction>(U)) {
-          Function *P = I->getParent()->getParent();
-          if (P != &F)
-            BlockAddressUsers.insert(P);
+      SmallVector<Value *, 16> BlockAddressUsersStack { BA };
+      SmallPtrSet<Value *, 16> BlockAddressUsersVisited { BA };
+
+      while (!BlockAddressUsersStack.empty()) {
+        Value *V = BlockAddressUsersStack.pop_back_val();
+
+        for (User *U : V->users()) {
+          if ((isa<ConstantAggregate>(U) || isa<ConstantExpr>(U)) &&
+              !BlockAddressUsersVisited.contains(U)) {
+            BlockAddressUsersStack.push_back(U);
+            BlockAddressUsersVisited.insert(U);
+          }
+
+          if (auto *I = dyn_cast<Instruction>(U)) {
+            Function *P = I->getParent()->getParent();
+            if (P != &F)
+              BlockAddressUsers.insert(P);
+          }
         }
       }
     }

diff  --git a/llvm/test/Bitcode/blockaddress-aggregate-users.ll b/llvm/test/Bitcode/blockaddress-aggregate-users.ll
new file mode 100644
index 000000000000..febfc6dd0638
--- /dev/null
+++ b/llvm/test/Bitcode/blockaddress-aggregate-users.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
+; RUN: llvm-dis %t.bc
+
+; There's a curious case where blockaddress constants may refer to functions
+; outside of the function they're used in. There's a special bitcode function
+; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
+
+; The intent of this test is two-fold:
+; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn,
+;    @repro, since @fun and @fun2 both refer to @repro via blockaddress
+;    constants.
+; 2. Ensure we can round-trip serializing+desearlizing such case.
+
+; CHECK: <FUNCTION_BLOCK
+; CHECK: <BLOCKADDR_USERS op0=2 op1=1
+; CHECK: <FUNCTION_BLOCK
+; CHECK: <FUNCTION_BLOCK
+
+%struct.ptrs = type { i8*, i8* }
+
+define void @repro() {
+  br label %label
+
+label:
+  call void @fun()
+  ret void
+}
+
+define void @fun() noinline {
+  call void @f(%struct.ptrs { i8* blockaddress(@repro, %label), i8* blockaddress(@repro, %label) })
+  ret void
+}
+
+define void @fun2() noinline {
+  call void @f(%struct.ptrs { i8* blockaddress(@repro, %label), i8* blockaddress(@repro, %label) })
+  ret void
+}
+
+declare void @f(%struct.ptrs)

diff  --git a/llvm/test/Bitcode/blockaddress-expr-users.ll b/llvm/test/Bitcode/blockaddress-expr-users.ll
new file mode 100644
index 000000000000..838365f3317e
--- /dev/null
+++ b/llvm/test/Bitcode/blockaddress-expr-users.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
+; RUN: llvm-dis %t.bc
+
+; There's a curious case where blockaddress constants may refer to functions
+; outside of the function they're used in. There's a special bitcode function
+; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
+
+; The intent of this test is two-fold:
+; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn,
+;    @repro, since @fun and @fun2 both refer to @repro via blockaddress
+;    constants.
+; 2. Ensure we can round-trip serializing+desearlizing such case.
+
+; CHECK: <FUNCTION_BLOCK
+; CHECK: <BLOCKADDR_USERS op0=2 op1=1
+; CHECK: <FUNCTION_BLOCK
+; CHECK: <FUNCTION_BLOCK
+
+define void @repro() {
+  br label %label
+
+label:
+  call void @fun()
+  ret void
+}
+
+define void @fun() noinline {
+  call void @f(i64 ptrtoint (i8* blockaddress(@repro, %label) to i64))
+  ret void
+}
+
+define void @fun2() noinline {
+  call void @f(i64 ptrtoint (i8* blockaddress(@repro, %label) to i64))
+  ret void
+}
+
+declare void @f(i64)

diff  --git a/llvm/test/Bitcode/blockaddress-globalvalue-users.ll b/llvm/test/Bitcode/blockaddress-globalvalue-users.ll
new file mode 100644
index 000000000000..2d91bd5f84f3
--- /dev/null
+++ b/llvm/test/Bitcode/blockaddress-globalvalue-users.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
+; RUN: llvm-dis %t.bc
+
+; There's a curious case where blockaddress constants may refer to functions
+; outside of the function they're used in. There's a special bitcode function
+; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
+
+; The intent of this test is two-fold:
+; 1. Ensure we do not produce BLOCKADDR_USERS bitcode function code on the first
+;    fn, @repro, by accident, when @fun and @fun2 use a global value, @foo,
+;    which is initialized to @repro's blockaddress constants.
+; 2. Ensure we can round-trip serializing+desearlizing such case.
+
+; CHECK: <FUNCTION_BLOCK
+; CHECK-NOT: <BLOCKADDR_USERS
+
+ at foo = global i8* blockaddress(@repro, %label)
+
+define void @repro() {
+  br label %label
+
+label:
+  call void @fun()
+  ret void
+}
+
+define void @fun() noinline {
+  call void @f(i8** @foo)
+  ret void
+}
+
+define void @fun2() noinline {
+  call void @f(i8** @foo)
+  ret void
+}
+
+declare void @f(i8**)
\ No newline at end of file

diff  --git a/llvm/test/Bitcode/blockaddress-nested-users.ll b/llvm/test/Bitcode/blockaddress-nested-users.ll
new file mode 100644
index 000000000000..b5c8f9371f0d
--- /dev/null
+++ b/llvm/test/Bitcode/blockaddress-nested-users.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
+; RUN: llvm-dis %t.bc
+
+; There's a curious case where blockaddress constants may refer to functions
+; outside of the function they're used in. There's a special bitcode function
+; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
+
+; The intent of this test is two-fold:
+; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn,
+;    @repro, since @fun and @fun2 both refer to @repro via blockaddress
+;    constants.
+; 2. Ensure we can round-trip serializing+desearlizing such case.
+
+; CHECK: <FUNCTION_BLOCK
+; CHECK: <BLOCKADDR_USERS op0=1 op1=2
+; CHECK: <FUNCTION_BLOCK
+; CHECK: <FUNCTION_BLOCK
+
+define void @repro() {
+  br label %label
+
+label:
+  call void @fun()
+  ret void
+}
+
+define void @fun() noinline {
+  call void @f(i64 add (i64 ptrtoint (i8* blockaddress(@repro, %label) to i64), i64 1))
+  ret void
+}
+
+define void @fun2() noinline {
+  call void @f(i64 add (i64 ptrtoint (i8* blockaddress(@repro, %label) to i64), i64 2))
+  ret void
+}
+
+declare void @f(i64)


        


More information about the llvm-commits mailing list