[llvm] 0efc9e5 - [ARM][MVE] More MVETailPredication debug messages. NFC.

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 6 01:58:37 PST 2020


Author: Sjoerd Meijer
Date: 2020-01-06T09:56:02Z
New Revision: 0efc9e5a8cc12b9cb30adf2a3dbb14ffbc60e338

URL: https://github.com/llvm/llvm-project/commit/0efc9e5a8cc12b9cb30adf2a3dbb14ffbc60e338
DIFF: https://github.com/llvm/llvm-project/commit/0efc9e5a8cc12b9cb30adf2a3dbb14ffbc60e338.diff

LOG: [ARM][MVE] More MVETailPredication debug messages. NFC.

I've added a few more debug messages to MVETailPredication because I wanted to
trace better which instructions are added/removed. And while I was at it, I
factored out one function which I thought was clearer, and have added some
comments to describe better the flow between MVETailPredication and
ARMLowOverheadLoops.

Differential Revision: https://reviews.llvm.org/D71549

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/lib/Target/ARM/MVETailPredication.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 136f7d7e8de0..31a98d86a54d 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -15,6 +15,10 @@
 /// - t2LoopDec - placed within in the loop body.
 /// - t2LoopEnd - the loop latch terminator.
 ///
+/// In addition to this, we also look for the presence of the VCTP instruction,
+/// which determines whether we can generated the tail-predicated low-overhead
+/// loop form.
+///
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"

diff  --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index 24bbc6236a4e..038c68739cdf 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -20,6 +20,11 @@
 /// - A tail-predicated loop, with implicit predication.
 /// - A loop containing multiple VCPT instructions, predicating multiple VPT
 ///   blocks of instructions operating on 
diff erent vector types.
+///
+/// This pass inserts the inserts the VCTP intrinsic to represent the effect of
+/// tail predication. This will be picked up by the ARM Low-overhead loop pass,
+/// which performs the final transformation to a DLSTP or WLSTP tail-predicated
+/// loop.
 
 #include "ARM.h"
 #include "ARMSubtarget.h"
@@ -86,6 +91,12 @@ class MVETailPredication : public LoopPass {
   /// Is the icmp that generates an i1 vector, based upon a loop counter
   /// and a limit that is defined outside the loop.
   bool isTailPredicate(Instruction *Predicate, Value *NumElements);
+
+  /// Insert the intrinsic to represent the effect of tail predication.
+  void InsertVCTPIntrinsic(Instruction *Predicate,
+                           DenseMap<Instruction*, Instruction*> &NewPredicates,
+                           VectorType *VecTy,
+                           Value *NumElements);
 };
 
 } // end namespace
@@ -124,7 +135,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
   // The MVE and LOB extensions are combined to enable tail-predication, but
   // there's nothing preventing us from generating VCTP instructions for v8.1m.
   if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) {
-    LLVM_DEBUG(dbgs() << "TP: Not a v8.1m.main+mve target.\n");
+    LLVM_DEBUG(dbgs() << "ARM TP: Not a v8.1m.main+mve target.\n");
     return false;
   }
 
@@ -149,7 +160,7 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
   // Look for the hardware loop intrinsic that sets the iteration count.
   IntrinsicInst *Setup = FindLoopIterations(Preheader);
 
-  // The test.set iteration could live in the pre- preheader.
+  // The test.set iteration could live in the pre-preheader.
   if (!Setup) {
     if (!Preheader->getSinglePredecessor())
       return false;
@@ -172,11 +183,9 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
   if (!Decrement)
     return false;
 
-  LLVM_DEBUG(dbgs() << "TP: Running on Loop: " << *L
-             << *Setup << "\n"
+  LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n"
              << *Decrement << "\n");
-  bool Changed = TryConvert(Setup->getArgOperand(0));
-  return Changed;
+  return TryConvert(Setup->getArgOperand(0));
 }
 
 bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
@@ -235,7 +244,7 @@ bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
     return false;
 
   // Now back to searching inside the loop body...
-  // Find the add with takes the index iv and adds a constant vector to it. 
+  // Find the add with takes the index iv and adds a constant vector to it.
   Instruction *BroadcastSplat = nullptr;
   Constant *Const = nullptr;
   if (!match(Induction, m_Add(m_Instruction(BroadcastSplat),
@@ -270,14 +279,14 @@ bool MVETailPredication::isTailPredicate(Instruction *I, Value *NumElements) {
   Value *OnEntry = Phi->getIncomingValueForBlock(L->getLoopPreheader());
   if (!match(OnEntry, m_Zero()))
     return false;
-  
+
   Value *InLoop = Phi->getIncomingValueForBlock(L->getLoopLatch());
   unsigned Lanes = cast<VectorType>(Insert->getType())->getNumElements();
 
   Instruction *LHS = nullptr;
   if (!match(InLoop, m_Add(m_Instruction(LHS), m_SpecificInt(Lanes))))
     return false;
-  
+
   return LHS == Phi;
 }
 
@@ -299,7 +308,7 @@ bool MVETailPredication::IsPredicatedVectorLoop() {
         unsigned ElementWidth = VecTy->getScalarSizeInBits();
         // MVE vectors are 128-bit, but don't support 128 x i1.
         // TODO: Can we support vectors larger than 128-bits?
-        unsigned MaxWidth = TTI->getRegisterBitWidth(true); 
+        unsigned MaxWidth = TTI->getRegisterBitWidth(true);
         if (Lanes * ElementWidth > MaxWidth || Lanes == MaxWidth)
           return false;
         MaskedInsts.push_back(cast<IntrinsicInst>(&I));
@@ -400,19 +409,25 @@ Value* MVETailPredication::ComputeElements(Value *TripCount,
 // tail predicated loop.
 static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
                     SetVector<Instruction*> &MaybeDead, Loop *L) {
-  if (BasicBlock *Exit = L->getUniqueExitBlock()) {
-    for (auto &Pair : NewPredicates) {
-      Instruction *OldPred = Pair.first;
-      Instruction *NewPred = Pair.second;
-
-      for (auto &I : *Exit) {
-        if (I.isSameOperationAs(OldPred)) {
-          Instruction *PredClone = NewPred->clone();
-          PredClone->insertBefore(&I);
-          I.replaceAllUsesWith(PredClone);
-          MaybeDead.insert(&I);
-          break;
-        }
+  BasicBlock *Exit = L->getUniqueExitBlock();
+  if (!Exit) {
+    LLVM_DEBUG(dbgs() << "ARM TP: can't find loop exit block\n");
+    return;
+  }
+
+  for (auto &Pair : NewPredicates) {
+    Instruction *OldPred = Pair.first;
+    Instruction *NewPred = Pair.second;
+
+    for (auto &I : *Exit) {
+      if (I.isSameOperationAs(OldPred)) {
+        Instruction *PredClone = NewPred->clone();
+        PredClone->insertBefore(&I);
+        I.replaceAllUsesWith(PredClone);
+        MaybeDead.insert(&I);
+        LLVM_DEBUG(dbgs() << "ARM TP: replacing: "; I.dump();
+                   dbgs() << "ARM TP: with:      "; PredClone->dump());
+        break;
       }
     }
   }
@@ -433,23 +448,69 @@ static void Cleanup(DenseMap<Instruction*, Instruction*> &NewPredicates,
     Dead.insert(I);
   }
 
-  for (auto *I : Dead)
+  for (auto *I : Dead) {
+    LLVM_DEBUG(dbgs() << "ARM TP: removing dead insn: "; I->dump());
     I->eraseFromParent();
+  }
 
   for (auto I : L->blocks())
     DeleteDeadPHIs(I);
 }
 
+void MVETailPredication::InsertVCTPIntrinsic(Instruction *Predicate,
+    DenseMap<Instruction*, Instruction*> &NewPredicates,
+    VectorType *VecTy, Value *NumElements) {
+  IRBuilder<> Builder(L->getHeader()->getFirstNonPHI());
+  Module *M = L->getHeader()->getModule();
+  Type *Ty = IntegerType::get(M->getContext(), 32);
+
+  // Insert a phi to count the number of elements processed by the loop.
+  PHINode *Processed = Builder.CreatePHI(Ty, 2);
+  Processed->addIncoming(NumElements, L->getLoopPreheader());
+
+  // Insert the intrinsic to represent the effect of tail predication.
+  Builder.SetInsertPoint(cast<Instruction>(Predicate));
+  ConstantInt *Factor =
+    ConstantInt::get(cast<IntegerType>(Ty), VecTy->getNumElements());
+
+  Intrinsic::ID VCTPID;
+  switch (VecTy->getNumElements()) {
+  default:
+    llvm_unreachable("unexpected number of lanes");
+  case 4:  VCTPID = Intrinsic::arm_mve_vctp32; break;
+  case 8:  VCTPID = Intrinsic::arm_mve_vctp16; break;
+  case 16: VCTPID = Intrinsic::arm_mve_vctp8; break;
+
+    // FIXME: vctp64 currently not supported because the predicate
+    // vector wants to be <2 x i1>, but v2i1 is not a legal MVE
+    // type, so problems happen at isel time.
+    // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics
+    // purposes, but takes a v4i1 instead of a v2i1.
+  }
+  Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
+  Value *TailPredicate = Builder.CreateCall(VCTP, Processed);
+  Predicate->replaceAllUsesWith(TailPredicate);
+  NewPredicates[Predicate] = cast<Instruction>(TailPredicate);
+
+  // Add the incoming value to the new phi.
+  // TODO: This add likely already exists in the loop.
+  Value *Remaining = Builder.CreateSub(Processed, Factor);
+  Processed->addIncoming(Remaining, L->getLoopLatch());
+  LLVM_DEBUG(dbgs() << "ARM TP: Insert processed elements phi: "
+             << *Processed << "\n"
+             << "ARM TP: Inserted VCTP: " << *TailPredicate << "\n");
+}
+
 bool MVETailPredication::TryConvert(Value *TripCount) {
-  if (!IsPredicatedVectorLoop())
+  if (!IsPredicatedVectorLoop()) {
+    LLVM_DEBUG(dbgs() << "ARM TP: no masked instructions in loop");
     return false;
+  }
 
-  LLVM_DEBUG(dbgs() << "TP: Found predicated vector loop.\n");
+  LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n");
 
   // Walk through the masked intrinsics and try to find whether the predicate
   // operand is generated from an induction variable.
-  Module *M = L->getHeader()->getModule();
-  Type *Ty = IntegerType::get(M->getContext(), 32);
   SetVector<Instruction*> Predicates;
   DenseMap<Instruction*, Instruction*> NewPredicates;
 
@@ -466,48 +527,14 @@ bool MVETailPredication::TryConvert(Value *TripCount) {
       continue;
 
     if (!isTailPredicate(Predicate, NumElements)) {
-      LLVM_DEBUG(dbgs() << "TP: Not tail predicate: " << *Predicate <<  "\n");
+      LLVM_DEBUG(dbgs() << "ARM TP: Not tail predicate: " << *Predicate << "\n");
       continue;
     }
 
-    LLVM_DEBUG(dbgs() << "TP: Found tail predicate: " << *Predicate << "\n");
+    LLVM_DEBUG(dbgs() << "ARM TP: Found tail predicate: " << *Predicate << "\n");
     Predicates.insert(Predicate);
 
-    // Insert a phi to count the number of elements processed by the loop.
-    IRBuilder<> Builder(L->getHeader()->getFirstNonPHI());
-    PHINode *Processed = Builder.CreatePHI(Ty, 2);
-    Processed->addIncoming(NumElements, L->getLoopPreheader());
-
-    // Insert the intrinsic to represent the effect of tail predication.
-    Builder.SetInsertPoint(cast<Instruction>(Predicate));
-    ConstantInt *Factor =
-      ConstantInt::get(cast<IntegerType>(Ty), VecTy->getNumElements());
-    Intrinsic::ID VCTPID;
-    switch (VecTy->getNumElements()) {
-    default:
-      llvm_unreachable("unexpected number of lanes");
-    case 4:  VCTPID = Intrinsic::arm_mve_vctp32; break;
-    case 8:  VCTPID = Intrinsic::arm_mve_vctp16; break;
-    case 16: VCTPID = Intrinsic::arm_mve_vctp8; break;
-
-      // FIXME: vctp64 currently not supported because the predicate
-      // vector wants to be <2 x i1>, but v2i1 is not a legal MVE
-      // type, so problems happen at isel time.
-      // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics
-      // purposes, but takes a v4i1 instead of a v2i1.
-    }
-    Function *VCTP = Intrinsic::getDeclaration(M, VCTPID);
-    Value *TailPredicate = Builder.CreateCall(VCTP, Processed);
-    Predicate->replaceAllUsesWith(TailPredicate);
-    NewPredicates[Predicate] = cast<Instruction>(TailPredicate);
-
-    // Add the incoming value to the new phi.
-    // TODO: This add likely already exists in the loop.
-    Value *Remaining = Builder.CreateSub(Processed, Factor);
-    Processed->addIncoming(Remaining, L->getLoopLatch());
-    LLVM_DEBUG(dbgs() << "TP: Insert processed elements phi: "
-               << *Processed << "\n"
-               << "TP: Inserted VCTP: " << *TailPredicate << "\n");
+    InsertVCTPIntrinsic(Predicate, NewPredicates, VecTy, NumElements);
   }
 
   // Now clean up.


        


More information about the llvm-commits mailing list