[clang] 9766fed - [DeadArgElim] Re-apply: Set unused arguments for internal functions

Quentin Colombet via cfe-commits cfe-commits at lists.llvm.org
Thu May 12 08:56:09 PDT 2022


Author: Quentin Colombet
Date: 2022-05-12T08:46:16-07:00
New Revision: 9766fed9c10e8ba2f67fad0a3e8b509a8064f7b3

URL: https://github.com/llvm/llvm-project/commit/9766fed9c10e8ba2f67fad0a3e8b509a8064f7b3
DIFF: https://github.com/llvm/llvm-project/commit/9766fed9c10e8ba2f67fad0a3e8b509a8064f7b3.diff

LOG: [DeadArgElim] Re-apply: Set unused arguments for internal functions

The re-apply includes fixes to clang tests that were missed in
the original commit.

Original message:
Prior to this patch we would only set to undef the unused arguments of the
external functions. The rationale was that unused arguments of internal
functions wouldn't need to be turned into undef arguments because they
should have been simply eliminated by the time we reach that code.

This is actually not true because there are plenty of cases where we can't
remove unused arguments. For instance, if the internal function is used in
an indirect call, it may not be possible to change the function signature.
Yet, for statically known call-sites we would still like to mark the unused
arguments as undef.

This patch enables the "set undef arguments" optimization on internal
functions when we encounter cases where internal functions cannot be
optimized. I.e., whenever an internal function is marked "live".

Differential Revision: https://reviews.llvm.org/D124699

Added: 
    llvm/test/Transforms/DeadArgElim/fct_ptr.ll

Modified: 
    clang/test/CodeGen/debug-info-block-vars.c
    clang/test/CodeGenObjCXX/nrvo.mm
    llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/debug-info-block-vars.c b/clang/test/CodeGen/debug-info-block-vars.c
index dc522a807951..11c899fa3c81 100644
--- a/clang/test/CodeGen/debug-info-block-vars.c
+++ b/clang/test/CodeGen/debug-info-block-vars.c
@@ -11,7 +11,10 @@
 // CHECK: call void @llvm.dbg.declare(metadata i8** %.block_descriptor.addr,
 // CHECK-SAME:                        metadata !DIExpression())
 // CHECK-OPT-NOT: alloca
-// CHECK-OPT: call void @llvm.dbg.value(metadata i8* %.block_descriptor,
+// Since the block address is not used anywhere in this function,
+// the optimizer (DeadArgElim) has replaced all the false uses
+// (i.e., metadata users) with undef.
+// CHECK-OPT: call void @llvm.dbg.value(metadata i8* undef,
 // CHECK-OPT-SAME:                      metadata !DIExpression())
 void f(void) {
   a(^{

diff  --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm
index 89d9ae9639cc..0e4b98996965 100644
--- a/clang/test/CodeGenObjCXX/nrvo.mm
+++ b/clang/test/CodeGenObjCXX/nrvo.mm
@@ -22,7 +22,11 @@ - (X)getNRVO {
 
 X blocksNRVO() {
   return ^{
-    // CHECK-LABEL: define internal void @___Z10blocksNRVOv_block_invoke
+    // With the optimizer enabled, the DeadArgElim pass is able to
+    // mark the block litteral address argument as unused and later the
+    // related block_litteral global variable is removed.
+    // This allows to promote this call to a fastcc call.
+    // CHECK-LABEL: define internal fastcc void @___Z10blocksNRVOv_block_invoke
     X x;
     // CHECK: call void @_ZN1XC1Ev
     // CHECK-NEXT: ret void

diff  --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 95f6a4f4fb57..a879a0fb30b3 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -268,9 +268,12 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
   if (!Fn.hasExactDefinition())
     return false;
 
-  // Functions with local linkage should already have been handled, except the
-  // fragile (variadic) ones which we can improve here.
-  if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
+  // Functions with local linkage should already have been handled, except if
+  // they are fully alive (e.g., called indirectly) and except for the fragile
+  // (variadic) ones. In these cases, we may still be able to improve their
+  // statically known call sites.
+  if ((Fn.hasLocalLinkage() && !LiveFunctions.count(&Fn)) &&
+      !Fn.getFunctionType()->isVarArg())
     return false;
 
   // Don't touch naked functions. The assembly might be using an argument, or

diff  --git a/llvm/test/Transforms/DeadArgElim/fct_ptr.ll b/llvm/test/Transforms/DeadArgElim/fct_ptr.ll
new file mode 100644
index 000000000000..2e352666c1f6
--- /dev/null
+++ b/llvm/test/Transforms/DeadArgElim/fct_ptr.ll
@@ -0,0 +1,67 @@
+; RUN: opt -S %s -deadargelim -o - | FileCheck %s
+; In that test @internal_fct is used by an instruction
+; we don't know how to rewrite (the comparison that produces
+; %cmp1).
+; Because of that use, we used to bail out on removing the
+; unused arguments for this function.
+; Yet, we should still be able to rewrite the direct calls that are
+; statically known, by replacing the related arguments with undef.
+; This is what we check on the call that produces %res2.
+
+define i32 @call_indirect(i32 (i32, i32, i32)* readnone %fct_ptr, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: @call_indirect(
+; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq i32 (i32, i32, i32)* [[FCT_PTR:%.*]], @external_fct
+; CHECK-NEXT:    br i1 [[CMP0]], label [[CALL_EXT:%.*]], label [[CHK2:%.*]]
+; CHECK:       call_ext:
+; CHECK-NEXT:    [[RES1:%.*]] = tail call i32 @external_fct(i32 undef, i32 [[ARG2:%.*]], i32 undef)
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       chk2:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 (i32, i32, i32)* [[FCT_PTR]], @internal_fct
+; CHECK-NEXT:    br i1 [[CMP1]], label [[CALL_INT:%.*]], label [[CALL_OTHER:%.*]]
+; CHECK:       call_int:
+; CHECK-NEXT:    [[RES2:%.*]] = tail call i32 @internal_fct(i32 undef, i32 [[ARG2]], i32 undef)
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       call_other:
+; CHECK-NEXT:    [[RES3:%.*]] = tail call i32 @other_fct(i32 [[ARG2]])
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[FINAL_RES:%.*]] = phi i32 [ [[RES1]], [[CALL_EXT]] ], [ [[RES2]], [[CALL_INT]] ], [ [[RES3]], [[CALL_OTHER]] ]
+; CHECK-NEXT:    ret i32 [[FINAL_RES]]
+;
+  %cmp0 = icmp eq i32 (i32, i32, i32)* %fct_ptr, @external_fct
+  br i1 %cmp0, label %call_ext, label %chk2
+
+call_ext:
+  %res1 = tail call i32 @external_fct(i32 %arg1, i32 %arg2, i32 %arg3)
+  br label %end
+
+chk2:
+  %cmp1 = icmp eq i32 (i32, i32, i32)* %fct_ptr, @internal_fct
+  br i1 %cmp1, label %call_int, label %call_other
+
+call_int:
+  %res2 = tail call i32 @internal_fct(i32 %arg1, i32 %arg2, i32 %arg3)
+  br label %end
+
+call_other:
+  %res3 = tail call i32 @other_fct(i32 %arg1, i32 %arg2, i32 %arg3)
+  br label %end
+
+end:
+  %final_res = phi i32 [%res1, %call_ext], [%res2, %call_int], [%res3, %call_other]
+  ret i32 %final_res
+}
+
+
+define i32 @external_fct(i32 %unused_arg1, i32 %arg2, i32 %unused_arg3) {
+  ret i32 %arg2
+}
+
+define internal i32 @internal_fct(i32 %unused_arg1, i32 %arg2, i32 %unused_arg3) {
+  ret i32 %arg2
+}
+
+define internal i32 @other_fct(i32 %unused_arg1, i32 %arg2, i32 %unused_arg3) {
+  ret i32 %arg2
+}
+


        


More information about the cfe-commits mailing list