[llvm] [C API] Add blockaddress getters to C API (PR #77390)

Benji Smith via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 16:04:30 PST 2024


https://github.com/Benjins updated https://github.com/llvm/llvm-project/pull/77390

>From 43db89c2bc2a42ff1e88deb99b7214e3e37e317c Mon Sep 17 00:00:00 2001
From: Benji Smith <6193112+Benjins at users.noreply.github.com>
Date: Sun, 7 Jan 2024 20:16:25 -0500
Subject: [PATCH 1/4] Refactor llvm-c-test to pre-declare all basic blocks for
 all functions

This is necessary for being able to clone blockaddress values, since they can
reference basic blocks in other functions. We declare all basic blocks up front
similar to how all functions are declared, and find them when the function
itself is cloned
---
 llvm/tools/llvm-c-test/echo.cpp | 46 +++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-c-test/echo.cpp b/llvm/tools/llvm-c-test/echo.cpp
index bc708e2d472edd..5d9e26f875e5da 100644
--- a/llvm/tools/llvm-c-test/echo.cpp
+++ b/llvm/tools/llvm-c-test/echo.cpp
@@ -238,6 +238,20 @@ static LLVMValueRef clone_constant(LLVMValueRef Cst, LLVMModuleRef M) {
   return Ret;
 }
 
+static LLVMBasicBlockRef find_bb_in_func(LLVMValueRef Fn, const char *BBName) {
+  LLVMBasicBlockRef CurBB = LLVMGetFirstBasicBlock(Fn);
+  while (CurBB != nullptr) {
+
+    const char *CurBBName = LLVMGetBasicBlockName(CurBB);
+    if (strcmp(CurBBName, BBName) == 0)
+      return CurBB;
+
+    CurBB = LLVMGetNextBasicBlock(CurBB);
+  }
+
+  return nullptr;
+}
+
 static LLVMValueRef clone_constant_impl(LLVMValueRef Cst, LLVMModuleRef M) {
   if (!LLVMIsAConstant(Cst))
     report_fatal_error("Expected a constant");
@@ -432,6 +446,17 @@ static LLVMValueRef clone_inline_asm(LLVMValueRef Asm, LLVMModuleRef M) {
                           CanUnwind);
 }
 
+static LLVMBasicBlockRef declare_bb_in_func(LLVMValueRef DstFn,
+                                            LLVMBasicBlockRef Src) {
+  const char *Name = LLVMGetBasicBlockName(Src);
+
+  if (find_bb_in_func(DstFn, Name) != nullptr)
+    report_fatal_error("Trying to re-declare existing basic block");
+
+  LLVMBasicBlockRef DstBB = LLVMAppendBasicBlock(DstFn, Name);
+  return DstBB;
+}
+
 struct FunCloner {
   LLVMValueRef Fun;
   LLVMModuleRef M;
@@ -1042,8 +1067,15 @@ struct FunCloner {
     if (Name != VName)
       report_fatal_error("Basic block name mismatch");
 
-    LLVMBasicBlockRef BB = LLVMAppendBasicBlock(Fun, Name);
-    return BBMap[Src] = BB;
+    // Scan for existing basic blocks that we forward-declared
+    // If a basic block is not cached in BBMap already, then it should exist
+    // in Fun, since we should have pre-declared all basic blocks earlier in
+    // declare_symbols
+    if (LLVMBasicBlockRef ExistingBB = find_bb_in_func(Fun, Name))
+      return BBMap[Src] = ExistingBB;
+
+    report_fatal_error("Trying to declare new basic block");
+    return nullptr;
   }
 
   LLVMBasicBlockRef CloneBB(LLVMBasicBlockRef Src) {
@@ -1188,6 +1220,16 @@ static void declare_symbols(LLVMModuleRef Src, LLVMModuleRef M) {
       }
     }
 
+    // Declare any basic blocks in this function:
+    // We need to do this here, in case any blockaddress value's are used,
+    // in which case we may reference basic blocks in any function
+    // Therefore, declare them before actually cloning any function
+    LLVMBasicBlockRef CurSrcBB = LLVMGetFirstBasicBlock(Cur);
+    while (CurSrcBB != nullptr) {
+      declare_bb_in_func(F, CurSrcBB);
+      CurSrcBB = LLVMGetNextBasicBlock(CurSrcBB);
+    }
+
     Next = LLVMGetNextFunction(Cur);
     if (Next == nullptr) {
       if (Cur != End)

>From 68a433cdda2cc5e310f36a424b37feaf93052e96 Mon Sep 17 00:00:00 2001
From: Benji Smith <6193112+Benjins at users.noreply.github.com>
Date: Sun, 7 Jan 2024 20:43:25 -0500
Subject: [PATCH 2/4] Add LLVMGetBlockAddressFunction and
 LLVMGetBlockAddressBasicBlock getters

This allows for accessing the function/basic block that a blockaddress constant
refers to

Tests are added for it in the llvm-c-test echo.ll file
---
 llvm/docs/ReleaseNotes.rst        |  3 +++
 llvm/include/llvm-c/Core.h        | 10 ++++++++++
 llvm/lib/IR/Core.cpp              |  8 ++++++++
 llvm/test/Bindings/llvm-c/echo.ll | 26 ++++++++++++++++++++++++++
 llvm/tools/llvm-c-test/echo.cpp   | 16 ++++++++++++++++
 5 files changed, 63 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 52610e7de18751..24dc9614d97dc5 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -246,6 +246,9 @@ Changes to the C API
   the fast-math flags of an instruction, as well as ``LLVMCanValueUseFastMathFlags``
   for checking if an instruction can use such flags
 
+* Added ``LLVMGetBlockAddressFunction`` and ``LLVMGetBlockAddressBasicBlock``
+  functions for accessing the values in a blockaddress constant
+
 Changes to the CodeGen infrastructure
 -------------------------------------
 
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 83530ae7b51324..1b9683a1d490ad 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -2328,6 +2328,16 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
                                     LLVMValueRef MaskConstant);
 LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB);
 
+/**
+ * Gets the function associated with a given BlockAddress constant value
+ */
+LLVMValueRef LLVMGetBlockAddressFunction(LLVMValueRef BlockAddr);
+
+/**
+ * Gets the basic block associated with a given BlockAddress constant value
+ */
+LLVMBasicBlockRef LLVMGetBlockAddressBasicBlock(LLVMValueRef BlockAddr);
+
 /** Deprecated: Use LLVMGetInlineAsm instead. */
 LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty,
                                 const char *AsmString, const char *Constraints,
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index fb30fbce0ba22e..d6d159ab8b9e83 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -1805,6 +1805,14 @@ LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
   return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
 }
 
+LLVMValueRef LLVMGetBlockAddressFunction(LLVMValueRef BlockAddr) {
+  return wrap(unwrap<BlockAddress>(BlockAddr)->getFunction());
+}
+
+LLVMBasicBlockRef LLVMGetBlockAddressBasicBlock(LLVMValueRef BlockAddr) {
+  return wrap(unwrap<BlockAddress>(BlockAddr)->getBasicBlock());
+}
+
 /*--.. Operations on global variables, functions, and aliases (globals) ....--*/
 
 LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
diff --git a/llvm/test/Bindings/llvm-c/echo.ll b/llvm/test/Bindings/llvm-c/echo.ll
index be0207599478b8..c69c921823a510 100644
--- a/llvm/test/Bindings/llvm-c/echo.ll
+++ b/llvm/test/Bindings/llvm-c/echo.ll
@@ -334,6 +334,32 @@ define void @test_fast_math_flags_call_outer(float %a) {
   ret void
 }
 
+define ptr @test_block_address_01() {
+entry:
+  br label %block_0
+block_0:
+  ret ptr blockaddress(@test_block_address_01, %block_0)
+}
+
+define ptr @test_block_address_02() {
+entry:
+  ret ptr blockaddress(@test_block_address_01, %block_0)
+}
+
+define ptr @test_block_address_03() {
+entry:
+  br label %block_1
+block_1:
+  ret ptr blockaddress(@test_block_address_04, %block_2)
+}
+
+define ptr @test_block_address_04() {
+entry:
+  br label %block_2
+block_2:
+  ret ptr blockaddress(@test_block_address_03, %block_1)
+}
+
 !llvm.dbg.cu = !{!0, !2}
 !llvm.module.flags = !{!3}
 
diff --git a/llvm/tools/llvm-c-test/echo.cpp b/llvm/tools/llvm-c-test/echo.cpp
index 5d9e26f875e5da..153001e0a23931 100644
--- a/llvm/tools/llvm-c-test/echo.cpp
+++ b/llvm/tools/llvm-c-test/echo.cpp
@@ -387,6 +387,22 @@ static LLVMValueRef clone_constant_impl(LLVMValueRef Cst, LLVMModuleRef M) {
     return LLVMConstVector(Elts.data(), EltCount);
   }
 
+  if (LLVMIsABlockAddress(Cst)) {
+    check_value_kind(Cst, LLVMBlockAddressValueKind);
+    LLVMValueRef SrcFunc = LLVMGetBlockAddressFunction(Cst);
+    LLVMBasicBlockRef SrcBB = LLVMGetBlockAddressBasicBlock(Cst);
+
+    LLVMValueRef DstFunc = clone_constant(SrcFunc, M);
+
+    LLVMBasicBlockRef DstBB =
+        find_bb_in_func(DstFunc, LLVMGetBasicBlockName(SrcBB));
+    if (DstBB == nullptr)
+      report_fatal_error(
+          "Could not find basic block with expected name for blockaddress");
+
+    return LLVMBlockAddress(DstFunc, DstBB);
+  }
+
   // At this point, if it's not a constant expression, it's a kind of constant
   // which is not supported
   if (!LLVMIsAConstantExpr(Cst))

>From a042c5f83b84077ff6780cfd12cbe4cc4f73efe5 Mon Sep 17 00:00:00 2001
From: Benji Smith <6193112+Benjins at users.noreply.github.com>
Date: Thu, 8 Feb 2024 08:32:07 -0500
Subject: [PATCH 3/4] Address documentation formatting PR feedback

---
 llvm/docs/ReleaseNotes.rst | 2 +-
 llvm/include/llvm-c/Core.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index ea373c10df94ba..474a93996c5b42 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -111,7 +111,7 @@ Changes to the C API
 --------------------
 
 * Added ``LLVMGetBlockAddressFunction`` and ``LLVMGetBlockAddressBasicBlock``
-  functions for accessing the values in a blockaddress constant
+  functions for accessing the values in a blockaddress constant.
 
 Changes to the CodeGen infrastructure
 -------------------------------------
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 1b9683a1d490ad..09746bdaf0c94e 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -2329,12 +2329,12 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
 LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB);
 
 /**
- * Gets the function associated with a given BlockAddress constant value
+ * Gets the function associated with a given BlockAddress constant value.
  */
 LLVMValueRef LLVMGetBlockAddressFunction(LLVMValueRef BlockAddr);
 
 /**
- * Gets the basic block associated with a given BlockAddress constant value
+ * Gets the basic block associated with a given BlockAddress constant value.
  */
 LLVMBasicBlockRef LLVMGetBlockAddressBasicBlock(LLVMValueRef BlockAddr);
 

>From 19978bcdb89dd0150ebeed78c0c8372668d768bf Mon Sep 17 00:00:00 2001
From: Benji Smith <6193112+Benjins at users.noreply.github.com>
Date: Fri, 9 Feb 2024 18:34:06 -0500
Subject: [PATCH 4/4] Add support for unnamed basic blocks for blockaddress in
 echo.cpp

blockaddress constants can use unnamed basic blocks if the blockaddress is in
the same function. To handle those cases, we re-use the BBMap that is in the
FunCloner. In other cases, we can look up the basic block by name as beore

This also means we only pre-declare named basic blocks, and unnamed basic
blocks are lazily declared as normal

There are more tests to check some of these cases. These are now moved to
basic_blocks.ll to keep it more contained
---
 llvm/test/Bindings/llvm-c/basic_blocks.ll | 49 ++++++++++++
 llvm/test/Bindings/llvm-c/echo.ll         | 26 -------
 llvm/tools/llvm-c-test/echo.cpp           | 90 +++++++++++++++++------
 3 files changed, 118 insertions(+), 47 deletions(-)
 create mode 100644 llvm/test/Bindings/llvm-c/basic_blocks.ll

diff --git a/llvm/test/Bindings/llvm-c/basic_blocks.ll b/llvm/test/Bindings/llvm-c/basic_blocks.ll
new file mode 100644
index 00000000000000..418807b407db80
--- /dev/null
+++ b/llvm/test/Bindings/llvm-c/basic_blocks.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as < %s | llvm-dis > %t.orig
+; RUN: llvm-as < %s | llvm-c-test --echo > %t.echo
+; RUN: diff -w %t.orig %t.echo
+;
+source_filename = "/test/Bindings/basic_blocks.ll"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define ptr @test_block_address_01() {
+entry:
+  br label %block_0
+block_0:
+  ret ptr blockaddress(@test_block_address_01, %block_0)
+}
+
+define ptr @test_block_address_02() {
+entry:
+  ret ptr blockaddress(@test_block_address_01, %block_0)
+}
+
+define ptr @test_block_address_03() {
+entry:
+  br label %block_1
+block_1:
+  ret ptr blockaddress(@test_block_address_04, %block_2)
+}
+
+define ptr @test_block_address_04() {
+entry:
+  br label %block_2
+block_2:
+  ret ptr blockaddress(@test_block_address_03, %block_1)
+}
+
+define ptr @test_block_address_unnamed_01(i32 %0) {
+1:
+  br label %2
+2:
+  br label %block_0
+block_0:
+  ret ptr blockaddress(@test_block_address_unnamed_01, %2)
+}
+
+define void @test_block_address_global_01() {
+    br label %foo
+foo:
+    ret void
+}
+
+ at block_addr_global = global ptr blockaddress(@test_block_address_global_01, %foo)
diff --git a/llvm/test/Bindings/llvm-c/echo.ll b/llvm/test/Bindings/llvm-c/echo.ll
index c69c921823a510..be0207599478b8 100644
--- a/llvm/test/Bindings/llvm-c/echo.ll
+++ b/llvm/test/Bindings/llvm-c/echo.ll
@@ -334,32 +334,6 @@ define void @test_fast_math_flags_call_outer(float %a) {
   ret void
 }
 
-define ptr @test_block_address_01() {
-entry:
-  br label %block_0
-block_0:
-  ret ptr blockaddress(@test_block_address_01, %block_0)
-}
-
-define ptr @test_block_address_02() {
-entry:
-  ret ptr blockaddress(@test_block_address_01, %block_0)
-}
-
-define ptr @test_block_address_03() {
-entry:
-  br label %block_1
-block_1:
-  ret ptr blockaddress(@test_block_address_04, %block_2)
-}
-
-define ptr @test_block_address_04() {
-entry:
-  br label %block_2
-block_2:
-  ret ptr blockaddress(@test_block_address_03, %block_1)
-}
-
 !llvm.dbg.cu = !{!0, !2}
 !llvm.module.flags = !{!3}
 
diff --git a/llvm/tools/llvm-c-test/echo.cpp b/llvm/tools/llvm-c-test/echo.cpp
index 153001e0a23931..580cd201aa7b43 100644
--- a/llvm/tools/llvm-c-test/echo.cpp
+++ b/llvm/tools/llvm-c-test/echo.cpp
@@ -252,6 +252,13 @@ static LLVMBasicBlockRef find_bb_in_func(LLVMValueRef Fn, const char *BBName) {
   return nullptr;
 }
 
+// Returns true if the basic block has an actual name,
+// rather than an implicit numeric name
+static bool is_named_basic_block(LLVMBasicBlockRef BB) {
+  const char *Name = LLVMGetBasicBlockName(BB);
+  return Name != nullptr && *Name != '\0';
+}
+
 static LLVMValueRef clone_constant_impl(LLVMValueRef Cst, LLVMModuleRef M) {
   if (!LLVMIsAConstant(Cst))
     report_fatal_error("Expected a constant");
@@ -387,6 +394,8 @@ static LLVMValueRef clone_constant_impl(LLVMValueRef Cst, LLVMModuleRef M) {
     return LLVMConstVector(Elts.data(), EltCount);
   }
 
+  // Since we are outside the context of a function if we get here,
+  // we can only clone blockaddress values with named basic blocks
   if (LLVMIsABlockAddress(Cst)) {
     check_value_kind(Cst, LLVMBlockAddressValueKind);
     LLVMValueRef SrcFunc = LLVMGetBlockAddressFunction(Cst);
@@ -394,13 +403,13 @@ static LLVMValueRef clone_constant_impl(LLVMValueRef Cst, LLVMModuleRef M) {
 
     LLVMValueRef DstFunc = clone_constant(SrcFunc, M);
 
-    LLVMBasicBlockRef DstBB =
-        find_bb_in_func(DstFunc, LLVMGetBasicBlockName(SrcBB));
-    if (DstBB == nullptr)
-      report_fatal_error(
-          "Could not find basic block with expected name for blockaddress");
-
-    return LLVMBlockAddress(DstFunc, DstBB);
+    if (is_named_basic_block(SrcBB)) {
+      LLVMBasicBlockRef DstBB =
+          find_bb_in_func(DstFunc, LLVMGetBasicBlockName(SrcBB));
+      return LLVMBlockAddress(DstFunc, DstBB);
+    } else
+      report_fatal_error("Cannot clone unnamed blockaddress outside the "
+                         "context of the basic block's function");
   }
 
   // At this point, if it's not a constant expression, it's a kind of constant
@@ -464,13 +473,18 @@ static LLVMValueRef clone_inline_asm(LLVMValueRef Asm, LLVMModuleRef M) {
 
 static LLVMBasicBlockRef declare_bb_in_func(LLVMValueRef DstFn,
                                             LLVMBasicBlockRef Src) {
-  const char *Name = LLVMGetBasicBlockName(Src);
 
-  if (find_bb_in_func(DstFn, Name) != nullptr)
-    report_fatal_error("Trying to re-declare existing basic block");
+  if (is_named_basic_block(Src)) {
+    const char *Name = LLVMGetBasicBlockName(Src);
+
+    if (find_bb_in_func(DstFn, Name) != nullptr)
+      report_fatal_error("Trying to re-declare existing basic block");
+
+    LLVMBasicBlockRef DstBB = LLVMAppendBasicBlock(DstFn, Name);
+    return DstBB;
+  }
 
-  LLVMBasicBlockRef DstBB = LLVMAppendBasicBlock(DstFn, Name);
-  return DstBB;
+  return nullptr;
 }
 
 struct FunCloner {
@@ -491,11 +505,40 @@ struct FunCloner {
     return TypeCloner(M).Clone(Src);
   }
 
+  LLVMValueRef CloneBlockAddress(LLVMValueRef Src) {
+    check_value_kind(Src, LLVMBlockAddressValueKind);
+    LLVMValueRef SrcFunc = LLVMGetBlockAddressFunction(Src);
+    LLVMBasicBlockRef SrcBB = LLVMGetBlockAddressBasicBlock(Src);
+
+    LLVMValueRef DstFunc = clone_constant(SrcFunc, M);
+
+    LLVMBasicBlockRef DstBB = nullptr;
+    if (is_named_basic_block(SrcBB))
+      DstBB = find_bb_in_func(DstFunc, LLVMGetBasicBlockName(SrcBB));
+    else if (DstFunc == Fun)
+      DstBB = DeclareBB(SrcBB);
+    else
+      report_fatal_error("Cannot clone unnamed blockaddress outside the "
+                         "context of the basic block's function");
+
+    if (DstBB == nullptr)
+      report_fatal_error("Could not clone blockaddress");
+
+    return LLVMBlockAddress(DstFunc, DstBB);
+  }
+
   // Try to clone everything in the llvm::Value hierarchy.
   LLVMValueRef CloneValue(LLVMValueRef Src) {
     // First, the value may be constant.
-    if (LLVMIsAConstant(Src))
-      return clone_constant(Src, M);
+    if (LLVMIsAConstant(Src)) {
+      if (LLVMIsABlockAddress(Src))
+        // blockaddress values can reference unnamed basic blocks, but only
+        // inside that block's function. To support that case, we need access
+        // to BBMap in order to map from the Src basic block to our own
+        return CloneBlockAddress(Src);
+      else
+        return clone_constant(Src, M);
+    }
 
     // Function argument should always be in the map already.
     auto i = VMap.find(Src);
@@ -1085,13 +1128,18 @@ struct FunCloner {
 
     // Scan for existing basic blocks that we forward-declared
     // If a basic block is not cached in BBMap already, then it should exist
-    // in Fun, since we should have pre-declared all basic blocks earlier in
-    // declare_symbols
-    if (LLVMBasicBlockRef ExistingBB = find_bb_in_func(Fun, Name))
-      return BBMap[Src] = ExistingBB;
+    // in Fun, since we should have pre-declared all named basic blocks earlier
+    // in declare_symbols
+    if (is_named_basic_block(Src)) {
+      if (LLVMBasicBlockRef ExistingBB = find_bb_in_func(Fun, Name))
+        return BBMap[Src] = ExistingBB;
+      else
+        report_fatal_error("Expected named basic block to be pre-declared");
+    }
 
-    report_fatal_error("Trying to declare new basic block");
-    return nullptr;
+    // If a basic block is unnamed, then we need to create it here
+    LLVMBasicBlockRef BB = LLVMAppendBasicBlock(Fun, Name);
+    return BBMap[Src] = BB;
   }
 
   LLVMBasicBlockRef CloneBB(LLVMBasicBlockRef Src) {
@@ -1238,7 +1286,7 @@ static void declare_symbols(LLVMModuleRef Src, LLVMModuleRef M) {
 
     // Declare any basic blocks in this function:
     // We need to do this here, in case any blockaddress value's are used,
-    // in which case we may reference basic blocks in any function
+    // in which case we may reference any named basic blocks in any function
     // Therefore, declare them before actually cloning any function
     LLVMBasicBlockRef CurSrcBB = LLVMGetFirstBasicBlock(Cur);
     while (CurSrcBB != nullptr) {



More information about the llvm-commits mailing list