[llvm] f9d0b35 - [LV] Re-use already computed runtime VF in fixFixedOrderRecurrence.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 10 13:25:31 PDT 2023


Author: Florian Hahn
Date: 2023-04-10T21:25:12+01:00
New Revision: f9d0b35d2238b5d8977d1d920ee7311b33f0966c

URL: https://github.com/llvm/llvm-project/commit/f9d0b35d2238b5d8977d1d920ee7311b33f0966c
DIFF: https://github.com/llvm/llvm-project/commit/f9d0b35d2238b5d8977d1d920ee7311b33f0966c.diff

LOG: [LV] Re-use already computed runtime VF in fixFixedOrderRecurrence.

This was suggested as independent cleanup in D147472.

This removes a redundant runtime VF computation when using scalable
vectors.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ccbafccd9fb1..2b7f1f7f5cd8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3861,13 +3861,14 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence(
   Value *Incoming = State.get(PreviousDef, UF - 1);
   auto *ExtractForScalar = Incoming;
   auto *IdxTy = Builder.getInt32Ty();
+  Value *RuntimeVF = nullptr;
   if (VF.isVector()) {
     auto *One = ConstantInt::get(IdxTy, 1);
     Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
-    auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
+    RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
-    ExtractForScalar = Builder.CreateExtractElement(ExtractForScalar, LastIdx,
-                                                    "vector.recur.extract");
+    ExtractForScalar =
+        Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract");
   }
 
   auto RecurSplice = cast<VPInstruction>(*PhiR->user_begin());
@@ -3888,7 +3889,6 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence(
     // LoopMiddleBlock, when the scalar loop is not run at all.
     Value *ExtractForPhiUsedOutsideLoop = nullptr;
     if (VF.isVector()) {
-      auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
       auto *Idx = Builder.CreateSub(RuntimeVF, ConstantInt::get(IdxTy, 2));
       ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
           Incoming, Idx, "vector.recur.extract.for.phi");

diff  --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index c5bbda6ba8ec..1cd6a47684a3 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -27,9 +27,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
 ; CHECK-VF4UF1: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4
 ; CHECK-VF4UF1: %[[SUB2:.*]] = sub i32 %[[MUL2]], 1
 ; CHECK-VF4UF1: %[[VEC_RECUR_EXT:.*]] = extractelement <vscale x 4 x i32> %[[LOAD]], i32 %[[SUB2]]
-; CHECK-VF4UF1: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32()
-; CHECK-VF4UF1: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4
-; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL3]], 2
+; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2
 ; CHECK-VF4UF1: %[[VEC_RECUR_FOR_PHI:.*]] =  extractelement <vscale x 4 x i32> %[[LOAD]], i32 %[[SUB3]]
 entry:
   br label %for.preheader
@@ -216,9 +214,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; CHECK-VF4UF2: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4
 ; CHECK-VF4UF2: %[[SUB2:.*]] = sub i32 %[[MUL2]], 1
 ; CHECK-VF4UF2: %vector.recur.extract = extractelement <vscale x 4 x i32> %[[ADD2]], i32 %[[SUB2]]
-; CHECK-VF4UF2: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32()
-; CHECK-VF4UF2: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4
-; CHECK-VF4UF2: %[[SUB3:.*]] = sub i32 %[[MUL3]], 2
+; CHECK-VF4UF2: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2
 ; CHECK-VF4UF2: %vector.recur.extract.for.phi = extractelement <vscale x 4 x i32> %[[ADD2]], i32 %[[SUB3]]
 entry:
   br label %for.body


        


More information about the llvm-commits mailing list