[llvm] [AArch64] Add MATCH loops to LoopIdiomVectorizePass (PR #101976)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 5 06:31:48 PDT 2024


github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->


:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff fe855666ee27ad95e4a23fe80d735d0484a4ed13 a6e26ffeb9770df51b0338fb151d9b314b192343 --extensions cpp,h -- llvm/include/llvm/Analysis/TargetTransformInfo.h llvm/include/llvm/Analysis/TargetTransformInfoImpl.h llvm/lib/Analysis/TargetTransformInfo.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 050807142f..c7e1015a71 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6122,7 +6122,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch.");
     assert(Op1.getValueSizeInBits().getKnownMinValue() == 128 &&
            "Custom lower only works on 128-bit segments.");
-    assert((Op1.getValueType().getVectorElementType() == MVT::i8  ||
+    assert((Op1.getValueType().getVectorElementType() == MVT::i8 ||
             Op1.getValueType().getVectorElementType() == MVT::i16) &&
            "Custom lower only supports 8-bit or 16-bit characters.");
     assert(SegmentSize == MinNumElts && "Custom lower needs segment size to "
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 806dc856c5..949470678e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3841,7 +3841,7 @@ bool AArch64TTIImpl::hasVectorMatch(VectorType *VT, unsigned SegSize) const {
   if (ST->hasSVE2() && ST->isSVEAvailable() &&
       VT->getPrimitiveSizeInBits().getKnownMinValue() == 128 &&
       VT->getElementCount().getKnownMinValue() == SegSize &&
-      (VT->getElementCount().getKnownMinValue() ==  8 ||
+      (VT->getElementCount().getKnownMinValue() == 8 ||
        VT->getElementCount().getKnownMinValue() == 16))
     return true;
   return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
index a9683f08c5..9cd01417b0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
@@ -149,14 +149,14 @@ private:
                              unsigned VF, unsigned CharWidth,
                              BasicBlock *ExitSucc, BasicBlock *ExitFail,
                              GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
-                             Value *StartA, Value *EndA,
-                             Value *StartB, Value *EndB);
+                             Value *StartA, Value *EndA, Value *StartB,
+                             Value *EndB);
 
   void transformFindFirstByte(PHINode *IndPhi, unsigned VF, unsigned CharWidth,
                               BasicBlock *ExitSucc, BasicBlock *ExitFail,
                               GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
-                              Value *StartA, Value *EndA,
-                              Value *StartB, Value *EndB);
+                              Value *StartA, Value *EndA, Value *StartB,
+                              Value *EndB);
   /// @}
 };
 } // anonymous namespace
@@ -1010,12 +1010,14 @@ bool LoopIdiomVectorize::recognizeFindFirstByte() {
   // We are expecting the following blocks below. For now, we will bail out for
   // anything deviating from this.
   //
-  // .preheader:                                       ; preds = %.preheader.preheader, %23
+  // .preheader:                                       ; preds =
+  // %.preheader.preheader, %23
   //   %14 = phi ptr [ %24, %23 ], [ %3, %.preheader.preheader ]
   //   %15 = load i8, ptr %14, align 1, !tbaa !14
   //   br label %19
   //
-  // 19:                                               ; preds = %16, %.preheader
+  // 19:                                               ; preds = %16,
+  // %.preheader
   //   %20 = phi ptr [ %7, %.preheader ], [ %17, %16 ]
   //   %21 = load i8, ptr %20, align 1, !tbaa !14
   //   %22 = icmp eq i8 %15, %21
@@ -1096,8 +1098,8 @@ bool LoopIdiomVectorize::recognizeFindFirstByte() {
   // values.
   PHINode *PNA = dyn_cast<PHINode>(A);
   PHINode *PNB = dyn_cast<PHINode>(B);
-  if (!PNA || PNA->getNumIncomingValues() != 2 ||
-      !PNB || PNB->getNumIncomingValues() != 2)
+  if (!PNA || PNA->getNumIncomingValues() != 2 || !PNB ||
+      PNB->getNumIncomingValues() != 2)
     return false;
 
   // One PHI comes from the outer loop, the other one from the inner loop.
@@ -1139,8 +1141,7 @@ bool LoopIdiomVectorize::recognizeFindFirstByte() {
   if (!match(InnerBB->getTerminator(),
              m_Br(m_ICmp(MatchPred, m_Specific(GEPB), m_Value(EndB)),
                   m_BasicBlock(OuterBB), m_Specific(MatchBB))) ||
-      MatchPred != ICmpInst::Predicate::ICMP_EQ ||
-      !CurLoop->contains(OuterBB))
+      MatchPred != ICmpInst::Predicate::ICMP_EQ || !CurLoop->contains(OuterBB))
     return false;
 
   // OuterBB should increment the address of the element we are looking for.
@@ -1152,8 +1153,7 @@ bool LoopIdiomVectorize::recognizeFindFirstByte() {
       MatchPred != ICmpInst::Predicate::ICMP_EQ)
     return false;
 
-  LLVM_DEBUG(dbgs() << "FOUND IDIOM IN LOOP: \n"
-                    << *CurLoop << "\n\n");
+  LLVM_DEBUG(dbgs() << "FOUND IDIOM IN LOOP: \n" << *CurLoop << "\n\n");
 
   transformFindFirstByte(IndPhi, VF, CharWidth, ExitSucc, ExitFail, GEPA, GEPB,
                          StartA, EndA, StartB, EndB);
@@ -1162,9 +1162,9 @@ bool LoopIdiomVectorize::recognizeFindFirstByte() {
 
 Value *LoopIdiomVectorize::expandFindFirstByte(
     IRBuilder<> &Builder, DomTreeUpdater &DTU, unsigned VF, unsigned CharWidth,
-    BasicBlock *ExitSucc, BasicBlock *ExitFail,
-    GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
-    Value *StartA, Value *EndA, Value *StartB, Value *EndB) {
+    BasicBlock *ExitSucc, BasicBlock *ExitFail, GetElementPtrInst *GEPA,
+    GetElementPtrInst *GEPB, Value *StartA, Value *EndA, Value *StartB,
+    Value *EndB) {
   // Set up some types and constants that we intend to reuse.
   auto *I64Ty = Builder.getInt64Ty();
   auto *I32Ty = Builder.getInt32Ty();
@@ -1248,10 +1248,10 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
                                   GEPA->isInBounds());
   Value *CheckIncA = Builder.CreateICmpUGT(IncA, EndA);
   Value *SelA = Builder.CreateSelect(CheckIncA, EndA, IncA);
-  Value *PredA = Builder.CreateIntrinsic(
-      Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
-      {Builder.CreatePointerCast(PNA, I64Ty),
-       Builder.CreatePointerCast(SelA, I64Ty)});
+  Value *PredA =
+      Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
+                              {Builder.CreatePointerCast(PNA, I64Ty),
+                               Builder.CreatePointerCast(SelA, I64Ty)});
   Value *LoadA =
       Builder.CreateMaskedLoad(CharVTy, PNA, Align(1), PredA, Passthru);
   Value *PredBInit = Builder.CreateIntrinsic(
@@ -1266,8 +1266,8 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
   PHINode *PredBFull = Builder.CreatePHI(PredVTy, 2);
   Value *CheckB = Builder.CreateICmpULT(PNB, EndB);
   Builder.CreateCondBr(CheckB, BB4, BB1);
-  DTU.applyUpdates({{DominatorTree::Insert, BB3, BB4},
-                    {DominatorTree::Insert, BB3, BB1}});
+  DTU.applyUpdates(
+      {{DominatorTree::Insert, BB3, BB4}, {DominatorTree::Insert, BB3, BB1}});
 
   // (4) Check load B.
   Builder.SetInsertPoint(BB4);
@@ -1275,8 +1275,8 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
                                   GEPB->isInBounds());
   Value *IfNotFullB = Builder.CreateICmpUGT(IncB, EndB);
   Builder.CreateCondBr(IfNotFullB, BB6, BB5);
-  DTU.applyUpdates({{DominatorTree::Insert, BB4, BB6},
-                    {DominatorTree::Insert, BB4, BB5}});
+  DTU.applyUpdates(
+      {{DominatorTree::Insert, BB4, BB6}, {DominatorTree::Insert, BB4, BB5}});
 
   // (5) Full load B.
   Builder.SetInsertPoint(BB5);
@@ -1287,10 +1287,10 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
 
   // (6) Partial load B.
   Builder.SetInsertPoint(BB6);
-  Value *PredBPart = Builder.CreateIntrinsic(
-      Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
-      {Builder.CreatePointerCast(PNB, I64Ty),
-       Builder.CreatePointerCast(EndB, I64Ty)});
+  Value *PredBPart =
+      Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},
+                              {Builder.CreatePointerCast(PNB, I64Ty),
+                               Builder.CreatePointerCast(EndB, I64Ty)});
   Value *LoadBPart =
       Builder.CreateMaskedLoad(CharVTy, PNB, Align(1), PredBPart, Passthru);
   Value *LoadB0 = Builder.CreateExtractElement(LoadBPart, uint64_t(0));
@@ -1309,8 +1309,8 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
       {LoadA, LoadB, PredA, ConstantInt::get(I32Ty, VF)});
   Value *IfAnyMatch = Builder.CreateOrReduce(MatchPred);
   Builder.CreateCondBr(IfAnyMatch, BB8, BB3);
-  DTU.applyUpdates({{DominatorTree::Insert, BB7, BB8},
-                    {DominatorTree::Insert, BB7, BB3}});
+  DTU.applyUpdates(
+      {{DominatorTree::Insert, BB7, BB8}, {DominatorTree::Insert, BB7, BB3}});
 
   // (8) Match success.
   Builder.SetInsertPoint(BB8);
@@ -1350,9 +1350,8 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
 }
 
 void LoopIdiomVectorize::transformFindFirstByte(
-    PHINode *IndPhi, unsigned VF, unsigned CharWidth,
-    BasicBlock *ExitSucc, BasicBlock *ExitFail,
-    GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+    PHINode *IndPhi, unsigned VF, unsigned CharWidth, BasicBlock *ExitSucc,
+    BasicBlock *ExitFail, GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
     Value *StartA, Value *EndA, Value *StartB, Value *EndB) {
   // Insert the find first byte code at the end of the preheader block.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
@@ -1362,8 +1361,8 @@ void LoopIdiomVectorize::transformFindFirstByte(
   Builder.SetCurrentDebugLocation(PHBranch->getDebugLoc());
 
   Value *MatchVal =
-      expandFindFirstByte(Builder, DTU, VF, CharWidth, ExitSucc, ExitFail,
-                          GEPA, GEPB, StartA, EndA, StartB, EndB);
+      expandFindFirstByte(Builder, DTU, VF, CharWidth, ExitSucc, ExitFail, GEPA,
+                          GEPB, StartA, EndA, StartB, EndB);
 
   assert(PHBranch->isUnconditional() &&
          "Expected preheader to terminate with an unconditional branch.");
@@ -1379,5 +1378,5 @@ void LoopIdiomVectorize::transformFindFirstByte(
   // want to reuse them to implement an alternative path for small arrays, for
   // example.
 
-  //dbgs() << *Preheader->getParent() << "\n";
+  // dbgs() << *Preheader->getParent() << "\n";
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/101976


More information about the llvm-commits mailing list