[llvm] 28d45c8 - [llvm][CallBrPrepare] use SSAUpdater to use intrinsic value

Nick Desaulniers via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 16 18:04:00 PST 2023


Author: Nick Desaulniers
Date: 2023-02-16T17:58:34-08:00
New Revision: 28d45c843cd07514c9a7260d285ff59e14280ecf

URL: https://github.com/llvm/llvm-project/commit/28d45c843cd07514c9a7260d285ff59e14280ecf
DIFF: https://github.com/llvm/llvm-project/commit/28d45c843cd07514c9a7260d285ff59e14280ecf.diff

LOG: [llvm][CallBrPrepare] use SSAUpdater to use intrinsic value

Now that we've inserted a call to an intrinsic, we need to update
certain previous uses of CallBrInst values to use the value of this
intrinsic instead.

There are 3 cases to handle:
1. The @llvm.callbr.landingpad.<type>() intrinsic call is in the same
   BasicBlock as the use of the callbr we're replacing.
2. The use is dominated by the direct destination.
3. The use is not dominated by the direct destination, and may or may
   not be dominated by the indirect destination.

Part 2c of
https://discourse.llvm.org/t/rfc-syncing-asm-goto-with-outputs-with-gcc/65453/8.

Reviewed By: efriedma, void, jyknight

Differential Revision: https://reviews.llvm.org/D139970

Added: 
    

Modified: 
    llvm/lib/CodeGen/CallBrPrepare.cpp
    llvm/test/CodeGen/AArch64/callbr-prepare.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CallBrPrepare.cpp b/llvm/lib/CodeGen/CallBrPrepare.cpp
index 39192d0ad37c..56c2db9392b7 100644
--- a/llvm/lib/CodeGen/CallBrPrepare.cpp
+++ b/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -34,6 +34,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -46,6 +47,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 
 using namespace llvm;
 
@@ -55,6 +57,10 @@ namespace {
 
 class CallBrPrepare : public FunctionPass {
   bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
+  bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+                            DominatorTree &DT) const;
+  void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+                 SSAUpdater &SSAUpdate) const;
 
 public:
   CallBrPrepare() : FunctionPass(ID) {}
@@ -108,23 +114,89 @@ bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
   return Changed;
 }
 
-static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs) {
+bool CallBrPrepare::InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+                                         DominatorTree &DT) const {
   bool Changed = false;
   SmallPtrSet<const BasicBlock *, 4> Visited;
   IRBuilder<> Builder(CBRs[0]->getContext());
   for (CallBrInst *CBR : CBRs) {
+    if (!CBR->getNumIndirectDests())
+      continue;
+
+    SSAUpdater SSAUpdate;
+    SSAUpdate.Initialize(CBR->getType(), CBR->getName());
+    SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
+    SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+
     for (BasicBlock *IndDest : CBR->getIndirectDests()) {
       if (!Visited.insert(IndDest).second)
         continue;
       Builder.SetInsertPoint(&*IndDest->begin());
-      Builder.CreateIntrinsic(CBR->getType(), Intrinsic::callbr_landingpad,
-                              {CBR});
+      CallInst *Intrinsic = Builder.CreateIntrinsic(
+          CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
+      SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
+      UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
       Changed = true;
     }
   }
   return Changed;
 }
 
+static bool IsInSameBasicBlock(const Use &U, const BasicBlock *BB) {
+  const auto *I = dyn_cast<Instruction>(U.getUser());
+  return I && I->getParent() == BB;
+}
+
+static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
+                              const BasicBlock *BB, bool IsDefaultDest) {
+  if (!isa<Instruction>(U.getUser()))
+    return;
+  const bool IsDominated = DT.dominates(BB, U);
+  LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
+                    << cast<Instruction>(U.getUser())->getParent()->getName()
+                    << ", is " << (IsDominated ? "" : "NOT ") << "dominated by "
+                    << BB->getName() << " (" << (IsDefaultDest ? "in" : "")
+                    << "direct)\n");
+}
+
+void CallBrPrepare::UpdateSSA(DominatorTree &DT, CallBrInst *CBR,
+                              CallInst *Intrinsic,
+                              SSAUpdater &SSAUpdate) const {
+
+  SmallPtrSet<Use *, 4> Visited;
+  BasicBlock *DefaultDest = CBR->getDefaultDest();
+  BasicBlock *LandingPad = Intrinsic->getParent();
+
+  SmallVector<Use *, 4> Uses(make_pointer_range(CBR->uses()));
+  for (Use *U : Uses) {
+    if (!Visited.insert(U).second)
+      continue;
+
+#ifndef NDEBUG
+    PrintDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
+    PrintDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
+#endif
+
+    // Don't rewrite the use in the newly inserted intrinsic.
+    if (const auto *II = dyn_cast<IntrinsicInst>(U->getUser()))
+      if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
+        continue;
+
+    // If the Use is in the same BasicBlock as the Intrinsic call, replace
+    // the Use with the value of the Intrinsic call.
+    if (IsInSameBasicBlock(*U, LandingPad)) {
+      U->set(Intrinsic);
+      continue;
+    }
+
+    // If the Use is dominated by the default dest, do not touch it.
+    if (DT.dominates(DefaultDest, *U))
+      continue;
+
+    SSAUpdate.RewriteUse(*U);
+  }
+}
+
 bool CallBrPrepare::runOnFunction(Function &Fn) {
   bool Changed = false;
   SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
@@ -151,7 +223,7 @@ bool CallBrPrepare::runOnFunction(Function &Fn) {
   if (SplitCriticalEdges(CBRs, *DT))
     Changed = true;
 
-  if (InsertIntrinsicCalls(CBRs))
+  if (InsertIntrinsicCalls(CBRs, *DT))
     Changed = true;
 
   return Changed;

diff  --git a/llvm/test/CodeGen/AArch64/callbr-prepare.ll b/llvm/test/CodeGen/AArch64/callbr-prepare.ll
index b1faf186aeff..08b48d65c89a 100644
--- a/llvm/test/CodeGen/AArch64/callbr-prepare.ll
+++ b/llvm/test/CodeGen/AArch64/callbr-prepare.ll
@@ -18,7 +18,7 @@ define i32 @test0() {
 ; CHECK:       direct2:
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       indirect:
-; CHECK-NEXT:    [[OUT3:%.*]] = phi i32 [ [[OUT]], [[ENTRY_INDIRECT_CRIT_EDGE:%.*]] ], [ [[OUT2]], [[DIRECT_INDIRECT_CRIT_EDGE:%.*]] ]
+; CHECK-NEXT:    [[OUT3:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_INDIRECT_CRIT_EDGE:%.*]] ], [ [[TMP1]], [[DIRECT_INDIRECT_CRIT_EDGE:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[OUT3]]
 ;
 entry:
@@ -61,6 +61,8 @@ y:
 ; Don't split edges unless they are critical, and callbr produces output, and
 ; that output is used.
 ; Here we have output, but no critical edge.
+; That said, we ought to insert a callbr landing pad intrinsic call and update
+; to use the correct SSA value.
 define i32 @dont_split1() {
 ; CHECK-LABEL: @dont_split1(
 ; CHECK-NEXT:  entry:
@@ -70,7 +72,7 @@ define i32 @dont_split1() {
 ; CHECK-NEXT:    ret i32 42
 ; CHECK:       y:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
-; CHECK-NEXT:    ret i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %0 = callbr i32 asm "", "=r,!i"()
@@ -146,7 +148,7 @@ define i32 @split_me0() {
 ; CHECK:       x:
 ; CHECK-NEXT:    br label [[Y]]
 ; CHECK:       y:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_Y_CRIT_EDGE:%.*]] ], [ 42, [[X]] ]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
@@ -177,7 +179,7 @@ define i32 @split_me1(i1 %z) {
 ; CHECK:       x:
 ; CHECK-NEXT:    ret i32 42
 ; CHECK:       v:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[W_V_CRIT_EDGE]] ], [ undef, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
@@ -210,7 +212,7 @@ define i32 @split_me2(i1 %z) {
 ; CHECK:       x:
 ; CHECK-NEXT:    ret i32 42
 ; CHECK:       v:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP0]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[W_V_CRIT_EDGE]] ], [ 42, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
@@ -227,3 +229,204 @@ v:
   %1 = phi i32 [ %0, %w ], [ 42, %entry ], [ %0, %w ]
   ret i32 %1
 }
+
+; Here we have a diamond with no phi.
+define i32 @dont_split4() {
+; CHECK-LABEL: @dont_split4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label %y]
+; CHECK:       x:
+; CHECK-NEXT:    br label [[OUT:%.*]]
+; CHECK:       y:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[Y:%.*]] ], [ [[TMP0]], [[X]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"()
+  to label %x [label %y]
+
+x:
+  br label %out
+
+y:
+  br label %out
+
+out:
+  ret i32 %0
+}
+
+; Triangle with no phi.
+define i32 @dont_split5() {
+; CHECK-LABEL: @dont_split5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[OUT:%.*]] [label %y]
+; CHECK:       y:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[Y:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"()
+  to label %out [label %y]
+
+y:
+  br label %out
+
+out:
+  ret i32 %0
+}
+
+; Triangle the other way with no phi.
+define i32 @split_me3() {
+; CHECK-LABEL: @split_me3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[Y:%.*]] [label %entry.out_crit_edge]
+; CHECK:       entry.out_crit_edge:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
+; CHECK-NEXT:    br label [[OUT:%.*]]
+; CHECK:       y:
+; CHECK-NEXT:    br label [[OUT]]
+; CHECK:       out:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_OUT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[Y]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"()
+  to label %y [label %out]
+
+y:
+  br label %out
+
+out:
+  ret i32 %0
+}
+
+; Test callbr looping back on itself.
+define i32 @dont_split6(i32 %0) {
+; CHECK-LABEL: @dont_split6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[TMP0:%.*]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = callbr i32 asm "", "=r,0,!i"(i32 [[TMP1]])
+; CHECK-NEXT:    to label [[EXIT:%.*]] [label %loop.loop_crit_edge]
+; CHECK:       loop.loop_crit_edge:
+; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP2]])
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %loop
+loop:
+  %1 = phi i32 [%0, %entry], [%2, %loop]
+  %2 = callbr i32 asm "", "=r,0,!i"(i32 %1) to label %exit [label %loop]
+exit:
+  ret i32 0
+}
+
+; Test same direct+indirect dest no phi.
+define i32 @split_me4() {
+; CHECK-LABEL: @split_me4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[SAME:%.*]] [label %entry.same_crit_edge]
+; CHECK:       entry.same_crit_edge:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
+; CHECK-NEXT:    br label [[SAME]]
+; CHECK:       same:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_SAME_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"() to label %same [label %same]
+same:
+  ret i32 %0
+}
+
+; Test same direct+indirect dest w/ phi.
+define i32 @split_me5() {
+; CHECK-LABEL: @split_me5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[SAME:%.*]] [label %entry.same_crit_edge]
+; CHECK:       entry.same_crit_edge:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
+; CHECK-NEXT:    br label [[SAME]]
+; CHECK:       same:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[ENTRY_SAME_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"() to label %same [label %same]
+same:
+  %1 = phi i32 [%0, %entry], [%0, %entry]
+  ret i32 %1
+}
+
+; "The Devil's cross" (i.e. two asm goto with conflicting physreg constraints
+; going to the same destination).
+define i64 @split_me6() {
+; CHECK-LABEL: @split_me6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i64 asm "# $0 $1", "={dx},!i"()
+; CHECK-NEXT:    to label [[ASM_FALLTHROUGH:%.*]] [label %entry.foo_crit_edge]
+; CHECK:       entry.foo_crit_edge:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.callbr.landingpad.i64(i64 [[TMP0]])
+; CHECK-NEXT:    br label [[FOO:%.*]]
+; CHECK:       asm.fallthrough:
+; CHECK-NEXT:    [[TMP2:%.*]] = callbr i64 asm "# $0 $1", "={bx},!i"()
+; CHECK-NEXT:    to label [[FOO]] [label %asm.fallthrough.foo_crit_edge]
+; CHECK:       asm.fallthrough.foo_crit_edge:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.callbr.landingpad.i64(i64 [[TMP2]])
+; CHECK-NEXT:    br label [[FOO]]
+; CHECK:       foo:
+; CHECK-NEXT:    [[X_0:%.*]] = phi i64 [ [[TMP1]], [[ENTRY_FOO_CRIT_EDGE:%.*]] ], [ [[TMP3]], [[ASM_FALLTHROUGH_FOO_CRIT_EDGE:%.*]] ], [ [[TMP2]], [[ASM_FALLTHROUGH]] ]
+; CHECK-NEXT:    ret i64 [[X_0]]
+;
+entry:
+  %0 = callbr i64 asm "# $0 $1", "={dx},!i"()
+  to label %asm.fallthrough [label %foo]
+
+asm.fallthrough:
+  %1 = callbr i64 asm "# $0 $1", "={bx},!i"()
+  to label %foo [label %foo]
+
+foo:
+  %x.0 = phi i64 [ %0, %entry ], [ %1, %asm.fallthrough ], [ %1, %asm.fallthrough ]
+  ret i64 %x.0
+}
+
+; Test the result of the callbr having multiple uses to avoid iterator
+; invalidation bugs in CallBrPrepare::UpdateSSA.
+define i32 @multiple_split() {
+; CHECK-LABEL: @multiple_split(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = callbr i32 asm "", "=r,!i"()
+; CHECK-NEXT:    to label [[X:%.*]] [label %y]
+; CHECK:       x:
+; CHECK-NEXT:    ret i32 42
+; CHECK:       y:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.callbr.landingpad.i32(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw i32 [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %0 = callbr i32 asm "", "=r,!i"()
+  to label %x [label %y]
+
+x:
+  ret i32 42
+
+y:
+  %1 = add nsw i32 %0, %0
+  ret i32 %1
+}


        


More information about the llvm-commits mailing list