[llvm] 42b34fa - Recommit "[LV] Inline CreateSplatIV call for scalar VFs."

Fri Jan 14 11:04:56 PST 2022

Author: Florian Hahn
Date: 2022-01-14T19:03:49Z
New Revision: 42b34facfdfe4c280d5d90498409ca25d1750f16

URL: https://github.com/llvm/llvm-project/commit/42b34facfdfe4c280d5d90498409ca25d1750f16
DIFF: https://github.com/llvm/llvm-project/commit/42b34facfdfe4c280d5d90498409ca25d1750f16.diff

LOG: Recommit "[LV] Inline CreateSplatIV call for scalar VFs."

This reverts the revert commit 073c27b5e5851f13d99d383e047309299b68827d.

A reduced test case has been added in 5e4966cbae7ba5 and the code has
been updated to handle the case where getInductionOpcode returns
BinaryOpsEnd. In this case, the original code was always using
Instruction::Add. Do the same in the patch.

Note this commit may slightly change the value naming, because it now
also assigns the 'induction' name in the floating point case.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/float-induction.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c5b886a708bc6..a2ee1e75f54f2 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2333,19 +2333,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
 static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
                             Instruction::BinaryOps BinOp, ElementCount VF,
                             IRBuilder<> &Builder) {
-  if (VF.isScalar()) {
-    // When unrolling and the VF is 1, we only need to add a simple scalar.
-    Type *Ty = Val->getType();
-    assert(!Ty->isVectorTy() && "Val must be a scalar");
-
-    if (Ty->isFloatingPointTy()) {
-      // Floating-point operations inherit FMF via the builder's flags.
-      Value *MulOp = Builder.CreateFMul(StartIdx, Step);
-      return Builder.CreateBinOp(BinOp, Val, MulOp);
-    }
-    return Builder.CreateAdd(Val, Builder.CreateMul(StartIdx, Step),
-                             "induction");
-  }
+  assert(VF.isVector() && "only vector VFs are supported");
 
   // Create and check the types.
   auto *ValVTy = cast<VectorType>(Val->getType());
@@ -2580,7 +2568,29 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
   Value *Step = CreateStepValue(ID.getStep());
   if (State.VF.isScalar()) {
     Value *ScalarIV = CreateScalarIV(Step);
-    CreateSplatIV(ScalarIV, Step);
+    Type *ScalarTy = IntegerType::get(ScalarIV->getContext(),
+                                      Step->getType()->getScalarSizeInBits());
+
+    Instruction::BinaryOps IncOp = ID.getInductionOpcode();
+    if (IncOp == Instruction::BinaryOpsEnd)
+      IncOp = Instruction::Add;
+    for (unsigned Part = 0; Part < UF; ++Part) {
+      Value *StartIdx = ConstantInt::get(ScalarTy, Part);
+      Instruction::BinaryOps MulOp = Instruction::Mul;
+      if (Step->getType()->isFloatingPointTy()) {
+        StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType());
+        MulOp = Instruction::FMul;
+      }
+
+      Value *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
+      Value *EntryPart = Builder.CreateBinOp(IncOp, ScalarIV, Mul, "induction");
+      State.set(Def, EntryPart, Part);
+      if (Trunc) {
+        assert(!Step->getType()->isFloatingPointTy() &&
+               "fp inductions shouldn't be truncated");
+        addMetadata(EntryPart, Trunc);
+      }
+    }
     return;
   }
 

diff  --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index ab243a1b9a925..00ae26cd6a83d 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -1603,11 +1603,11 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC1_INTERL2:       pred.store.continue:
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5]]
-; VEC1_INTERL2:       pred.store.if4:
+; VEC1_INTERL2:       pred.store.if6:
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
 ; VEC1_INTERL2-NEXT:    store float [[TMP7]], float* [[TMP2]], align 4
 ; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
-; VEC1_INTERL2:       pred.store.continue5:
+; VEC1_INTERL2:       pred.store.continue7:
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]