[llvm] r364770 - [Hexagon] Rework VLCR algorithm

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 06:50:48 PDT 2019


Author: kparzysz
Date: Mon Jul  1 06:50:47 2019
New Revision: 364770

URL: http://llvm.org/viewvc/llvm-project?rev=364770&view=rev
Log:
[Hexagon] Rework VLCR algorithm

Add code to catch pattern for commutative instructions for VLCR.

Patch by Suyog Sarda.

Added:
    llvm/trunk/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_commutative.ll
Modified:
    llvm/trunk/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp

Modified: llvm/trunk/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp?rev=364770&r1=364769&r2=364770&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp Mon Jul  1 06:50:47 2019
@@ -238,10 +238,17 @@ namespace {
     // used over the backedge. This is teh value that gets reused from a
     // previous iteration.
     Instruction *BackedgeInst = nullptr;
+    std::map<Instruction *, DepChain *> DepChains;
+    int Iterations = -1;
 
     ReuseValue() = default;
 
-    void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
+    void reset() {
+      Inst2Replace = nullptr;
+      BackedgeInst = nullptr;
+      DepChains.clear();
+      Iterations = -1;
+    }
     bool isDefined() { return Inst2Replace != nullptr; }
   };
 
@@ -288,10 +295,10 @@ namespace {
     void findDepChainFromPHI(Instruction *I, DepChain &D);
     void reuseValue();
     Value *findValueInBlock(Value *Op, BasicBlock *BB);
-    bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
-    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
+    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
     bool isEquivalentOperation(Instruction *I1, Instruction *I2);
     bool canReplace(Instruction *I);
+    bool isCallInstCommutative(CallInst *C);
   };
 
 } // end anonymous namespace
@@ -326,6 +333,70 @@ bool HexagonVectorLoopCarriedReuse::runO
   return doVLCR();
 }
 
+bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
+  switch (C->getCalledFunction()->getIntrinsicID()) {
+    case Intrinsic::hexagon_V6_vaddb:
+    case Intrinsic::hexagon_V6_vaddb_128B:
+    case Intrinsic::hexagon_V6_vaddh:
+    case Intrinsic::hexagon_V6_vaddh_128B:
+    case Intrinsic::hexagon_V6_vaddw:
+    case Intrinsic::hexagon_V6_vaddw_128B:
+    case Intrinsic::hexagon_V6_vaddubh:
+    case Intrinsic::hexagon_V6_vaddubh_128B:
+    case Intrinsic::hexagon_V6_vadduhw:
+    case Intrinsic::hexagon_V6_vadduhw_128B:
+    case Intrinsic::hexagon_V6_vaddhw:
+    case Intrinsic::hexagon_V6_vaddhw_128B:
+    case Intrinsic::hexagon_V6_vmaxb:
+    case Intrinsic::hexagon_V6_vmaxb_128B:
+    case Intrinsic::hexagon_V6_vmaxh:
+    case Intrinsic::hexagon_V6_vmaxh_128B:
+    case Intrinsic::hexagon_V6_vmaxw:
+    case Intrinsic::hexagon_V6_vmaxw_128B:
+    case Intrinsic::hexagon_V6_vmaxub:
+    case Intrinsic::hexagon_V6_vmaxub_128B:
+    case Intrinsic::hexagon_V6_vmaxuh:
+    case Intrinsic::hexagon_V6_vmaxuh_128B:
+    case Intrinsic::hexagon_V6_vminub:
+    case Intrinsic::hexagon_V6_vminub_128B:
+    case Intrinsic::hexagon_V6_vminuh:
+    case Intrinsic::hexagon_V6_vminuh_128B:
+    case Intrinsic::hexagon_V6_vminb:
+    case Intrinsic::hexagon_V6_vminb_128B:
+    case Intrinsic::hexagon_V6_vminh:
+    case Intrinsic::hexagon_V6_vminh_128B:
+    case Intrinsic::hexagon_V6_vminw:
+    case Intrinsic::hexagon_V6_vminw_128B:
+    case Intrinsic::hexagon_V6_vmpyub:
+    case Intrinsic::hexagon_V6_vmpyub_128B:
+    case Intrinsic::hexagon_V6_vmpyuh:
+    case Intrinsic::hexagon_V6_vmpyuh_128B:
+    case Intrinsic::hexagon_V6_vavgub:
+    case Intrinsic::hexagon_V6_vavgub_128B:
+    case Intrinsic::hexagon_V6_vavgh:
+    case Intrinsic::hexagon_V6_vavgh_128B:
+    case Intrinsic::hexagon_V6_vavguh:
+    case Intrinsic::hexagon_V6_vavguh_128B:
+    case Intrinsic::hexagon_V6_vavgw:
+    case Intrinsic::hexagon_V6_vavgw_128B:
+    case Intrinsic::hexagon_V6_vavgb:
+    case Intrinsic::hexagon_V6_vavgb_128B:
+    case Intrinsic::hexagon_V6_vavguw:
+    case Intrinsic::hexagon_V6_vavguw_128B:
+    case Intrinsic::hexagon_V6_vabsdiffh:
+    case Intrinsic::hexagon_V6_vabsdiffh_128B:
+    case Intrinsic::hexagon_V6_vabsdiffub:
+    case Intrinsic::hexagon_V6_vabsdiffub_128B:
+    case Intrinsic::hexagon_V6_vabsdiffuh:
+    case Intrinsic::hexagon_V6_vabsdiffuh_128B:
+    case Intrinsic::hexagon_V6_vabsdiffw:
+    case Intrinsic::hexagon_V6_vabsdiffw_128B:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
                                                           Instruction *I2) {
   if (!I1->isSameOperationAs(I2))
@@ -360,13 +431,19 @@ bool HexagonVectorLoopCarriedReuse::isEq
 
 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
-  if (II &&
-      (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
-       II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
+  if (!II)
+    return true;
+
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::hexagon_V6_hi:
+  case Intrinsic::hexagon_V6_lo:
+  case Intrinsic::hexagon_V6_hi_128B:
+  case Intrinsic::hexagon_V6_lo_128B:
     LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
     return false;
+  default:
+    return true;
   }
-  return true;
 }
 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
   for (auto *D : Dependences) {
@@ -427,34 +504,85 @@ void HexagonVectorLoopCarriedReuse::find
 
         int NumOperands = I->getNumOperands();
 
-        for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
-          Value *Op = I->getOperand(OpNo);
-          Value *BEOp = BEUser->getOperand(OpNo);
-
-          Instruction *OpInst = dyn_cast<Instruction>(Op);
-          if (!OpInst) {
-            if (Op == BEOp)
-              continue;
-            // Do not allow reuse to occur when the operands may be different
-            // values.
-            BEUser = nullptr;
-            break;
+        // Take operands of each PNUser one by one and try to find DepChain
+        // with every operand of the BEUser. If any of the operands of BEUser
+        // has DepChain with current operand of the PNUser, break the matcher
+        // loop. Keep doing this for Every PNUser operand. If PNUser operand
+        // does not have DepChain with any of the BEUser operand, break the
+        // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
+        // This ensures that DepChain exist for all the PNUser operand with
+        // BEUser operand. This also ensures that DepChains are independent of
+        // the positions in PNUser and BEUser.
+        std::map<Instruction *, DepChain *> DepChains;
+        CallInst *C1 = dyn_cast<CallInst>(I);
+        if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
+          bool Found = false;
+          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+            Value *Op = I->getOperand(OpNo);
+            Instruction *OpInst = dyn_cast<Instruction>(Op);
+            Found = false;
+            for (int T = 0; T < NumOperands; ++T) {
+              Value *BEOp = BEUser->getOperand(T);
+              Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+              if (!OpInst && !BEOpInst) {
+                if (Op == BEOp) {
+                  Found = true;
+                  break;
+                }
+              }
+
+              if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
+                continue;
+
+              DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+              if (D) {
+                Found = true;
+                DepChains[OpInst] = D;
+                break;
+              }
+            }
+            if (!Found) {
+              BEUser = nullptr;
+              break;
+            }
           }
+        } else {
 
-          Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
-
-          if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
-            BEUser = nullptr;
-            break;
+          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+            Value *Op = I->getOperand(OpNo);
+            Value *BEOp = BEUser->getOperand(OpNo);
+
+            Instruction *OpInst = dyn_cast<Instruction>(Op);
+            if (!OpInst) {
+              if (Op == BEOp)
+                continue;
+              // Do not allow reuse to occur when the operands may be different
+              // values.
+              BEUser = nullptr;
+              break;
+            }
+
+            Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+            DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+            if (D) {
+              DepChains[OpInst] = D;
+            } else {
+              BEUser = nullptr;
+              break;
+            }
           }
         }
         if (BEUser) {
           LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
           ReuseCandidate.Inst2Replace = I;
           ReuseCandidate.BackedgeInst = BEUser;
+          ReuseCandidate.DepChains = DepChains;
+          ReuseCandidate.Iterations = Iters;
           return;
-        } else
-          ReuseCandidate.reset();
+        }
+        ReuseCandidate.reset();
       }
     }
   }
@@ -474,27 +602,10 @@ void HexagonVectorLoopCarriedReuse::reus
   Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
   Instruction *BEInst = ReuseCandidate.BackedgeInst;
   int NumOperands = Inst2Replace->getNumOperands();
-  std::map<Instruction *, DepChain *> DepChains;
-  int Iterations = -1;
+  std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
+  int Iterations = ReuseCandidate.Iterations;
   BasicBlock *LoopPH = CurLoop->getLoopPreheader();
-
-  for (int i = 0; i < NumOperands; ++i) {
-    Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
-    if(!I)
-      continue;
-    else {
-      Instruction *J = cast<Instruction>(BEInst->getOperand(i));
-      DepChain *D = getDepChainBtwn(I, J);
-
-      assert(D &&
-             "No DepChain between corresponding operands in ReuseCandidate\n");
-      if (Iterations == -1)
-        Iterations = D->iterations();
-      assert(Iterations == D->iterations() && "Iterations mismatch");
-      DepChains[I] = D;
-    }
-  }
-
+  assert(!DepChains.empty() && "No DepChains");
   LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
 
   SmallVector<Instruction *, 4> InstsInPreheader;
@@ -603,20 +714,11 @@ void HexagonVectorLoopCarriedReuse::find
   }
 }
 
-bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
-                                                      Instruction *I2,
-                                                      int Iters) {
-  for (auto *D : Dependences) {
-    if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
-      return true;
-  }
-  return false;
-}
-
 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
-                                                            Instruction *I2) {
+                                                         Instruction *I2,
+                                                         int Iters) {
   for (auto *D : Dependences) {
-    if (D->front() == I1 && D->back() == I2)
+    if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
       return D;
   }
   return nullptr;

Added: llvm/trunk/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_commutative.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_commutative.ll?rev=364770&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_commutative.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_commutative.ll Mon Jul  1 06:50:47 2019
@@ -0,0 +1,82 @@
+; RUN: opt -march=hexagon < %s -hexagon-vlcr -adce -S | FileCheck %s
+
+; CHECK: %v32.hexagon.vlcr = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+ at g0 = external local_unnamed_addr global i32, align 4
+
+; Function Attrs: nounwind
+define void @f0(i8* noalias nocapture readonly %a0, i8* noalias nocapture %a1, i32 %a2) local_unnamed_addr #0 {
+b0:
+  %v0 = getelementptr inbounds i8, i8* %a0, i32 %a2
+  %v1 = mul nsw i32 %a2, 2
+  %v2 = getelementptr inbounds i8, i8* %a0, i32 %v1
+  %v3 = load i32, i32* @g0, align 4, !tbaa !0
+  %v4 = icmp sgt i32 %v3, 0
+  br i1 %v4, label %b1, label %b4
+
+b1:                                               ; preds = %b0
+  %v5 = bitcast i8* %v2 to <32 x i32>*
+  %v6 = load <32 x i32>, <32 x i32>* %v5, align 128, !tbaa !4
+  %v7 = getelementptr inbounds i8, i8* %v2, i32 128
+  %v8 = bitcast i8* %v7 to <32 x i32>*
+  %v9 = bitcast i8* %v0 to <32 x i32>*
+  %v10 = load <32 x i32>, <32 x i32>* %v9, align 128, !tbaa !4
+  %v11 = getelementptr inbounds i8, i8* %v0, i32 128
+  %v12 = bitcast i8* %v11 to <32 x i32>*
+  %v13 = bitcast i8* %a0 to <32 x i32>*
+  %v14 = load <32 x i32>, <32 x i32>* %v13, align 128, !tbaa !4
+  %v15 = getelementptr inbounds i8, i8* %a0, i32 128
+  %v16 = bitcast i8* %v15 to <32 x i32>*
+  %v17 = bitcast i8* %a1 to <32 x i32>*
+  br label %b2
+
+b2:                                               ; preds = %b2, %b1
+  %v18 = phi <32 x i32>* [ %v17, %b1 ], [ %v37, %b2 ]
+  %v19 = phi <32 x i32>* [ %v8, %b1 ], [ %v30, %b2 ]
+  %v20 = phi <32 x i32>* [ %v12, %b1 ], [ %v28, %b2 ]
+  %v21 = phi <32 x i32>* [ %v16, %b1 ], [ %v26, %b2 ]
+  %v22 = phi i32 [ 0, %b1 ], [ %v38, %b2 ]
+  %v23 = phi <32 x i32> [ %v14, %b1 ], [ %v27, %b2 ]
+  %v24 = phi <32 x i32> [ %v10, %b1 ], [ %v29, %b2 ]
+  %v25 = phi <32 x i32> [ %v6, %b1 ], [ %v31, %b2 ]
+  %v26 = getelementptr inbounds <32 x i32>, <32 x i32>* %v21, i32 1
+  %v27 = load <32 x i32>, <32 x i32>* %v21, align 128, !tbaa !4
+  %v28 = getelementptr inbounds <32 x i32>, <32 x i32>* %v20, i32 1
+  %v29 = load <32 x i32>, <32 x i32>* %v20, align 128, !tbaa !4
+  %v30 = getelementptr inbounds <32 x i32>, <32 x i32>* %v19, i32 1
+  %v31 = load <32 x i32>, <32 x i32>* %v19, align 128, !tbaa !4
+  %v32 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v23, <32 x i32> %v24)
+  %v33 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v32, <32 x i32> %v25)
+  %v34 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v29, <32 x i32> %v27)
+  %v35 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %v34, <32 x i32> %v31)
+  %v36 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> %v35, <32 x i32> %v33, i32 1)
+  %v37 = getelementptr inbounds <32 x i32>, <32 x i32>* %v18, i32 1
+  store <32 x i32> %v36, <32 x i32>* %v18, align 128, !tbaa !4
+  %v38 = add nuw nsw i32 %v22, 128
+  %v39 = icmp slt i32 %v38, %v3
+  br i1 %v39, label %b2, label %b3
+
+b3:                                               ; preds = %b2
+  br label %b4
+
+b4:                                               ; preds = %b3, %b0
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32>, <32 x i32>, i32) #1
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b,-long-calls" }
+attributes #1 = { nounwind readnone }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!2, !2, i64 0}




More information about the llvm-commits mailing list