[libcxx-commits] [libcxxabi] [clang] [libc] [lld] [flang] [clang-tools-extra] [lldb] [libcxx] [compiler-rt] [llvm] [libclc] [VPlan] Add new VPScalarCastRecipe, use for IV & step trunc. (PR #78113)
Florian Hahn via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Jan 25 15:06:48 PST 2024
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/78113
>From 36b085f21b76d7bf7c9965a86a09d1cef4fe9329 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 14 Jan 2024 14:13:08 +0000
Subject: [PATCH 1/8] [VPlan] Add new VPUniformPerUFRecipe, use for step
truncation.
Add a new recipe to model uniform-per-UF instructions, without relying
on an underlying instruction. Initially, it supports uniform cast-ops
and is therefore storing the result type.
Not relying on an underlying instruction (like the current
VPReplicateRecipe) allows to create instances without a corresponding
instruction.
In the future, to plan is to extend this recipe to handle all opcodes
needed to replace the uniform part of VPReplicateRecipe.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 30 ++++++++++++
.../Transforms/Vectorize/VPlanAnalysis.cpp | 6 ++-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 49 ++++++++++++++++---
.../Transforms/Vectorize/VPlanTransforms.cpp | 9 ++++
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 +
.../LoopVectorize/cast-induction.ll | 4 +-
.../interleave-and-scalarize-only.ll | 3 +-
.../pr46525-expander-insertpoint.ll | 2 +-
8 files changed, 93 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 4b4f4911eb6415e..d5985224cccc488 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1945,6 +1945,36 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue {
}
};
+/// VPUniformPerUFRecipe represents an instruction with Opcode that is uniform
+/// per UF, i.e. it generates a single scalar instance per UF.
+/// TODO: at the moment, only Cast opcodes are supported, extend to support
+/// missing opcodes to replace uniform part of VPReplicateRecipe.
+class VPUniformPerUFRecipe : public VPRecipeBase, public VPValue {
+ unsigned Opcode;
+
+ /// Result type for the cast.
+ Type *ResultTy;
+
+ Value *generate(VPTransformState &State, unsigned Part);
+
+public:
+ VPUniformPerUFRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPRecipeBase(VPDef::VPUniformPerUFSC, {Op}), VPValue(this),
+ Opcode(Opcode), ResultTy(ResultTy) {}
+
+ ~VPUniformPerUFRecipe() override = default;
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for generating conditional branches on the bits of a mask.
class VPBranchOnMaskRecipe : public VPRecipeBase {
public:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 97a8a1803bbf5a5..d71b07039944500 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -230,7 +230,11 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
return V->getUnderlyingValue()->getType();
})
.Case<VPWidenCastRecipe>(
- [](const VPWidenCastRecipe *R) { return R->getResultType(); });
+ [](const VPWidenCastRecipe *R) { return R->getResultType(); })
+ .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
+ return R->getSCEV()->getType();
+ });
+
assert(ResultTy && "could not infer type for the given VPValue");
CachedTypes[V] = ResultTy;
return ResultTy;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1f844bce23102e2..423504e8f7e05e7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -164,6 +164,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();
}
+ case VPUniformPerUFSC:
+ return false;
default:
return true;
}
@@ -1117,13 +1119,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// Ensure step has the same type as that of scalar IV.
Type *BaseIVTy = BaseIV->getType()->getScalarType();
- if (BaseIVTy != Step->getType()) {
- // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
- // avoid separate truncate here.
- assert(Step->getType()->isIntegerTy() &&
- "Truncation requires an integer step");
- Step = State.Builder.CreateTrunc(Step, BaseIVTy);
- }
+ assert(BaseIVTy == Step->getType());
// We build scalar steps for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -1469,6 +1465,45 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) {
+ switch (Opcode) {
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::Trunc: {
+ Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
+ return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
+ }
+ default:
+ llvm_unreachable("opcode not implemented yet");
+ }
+}
+
+void VPUniformPerUFRecipe ::execute(VPTransformState &State) {
+ bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) {
+ return Op->isDefinedOutsideVectorRegions();
+ });
+ for (unsigned Part = 0; Part != State.UF; ++Part) {
+ Value *Res;
+ // Only generate a single instance, if the recipe is uniform across all UFs.
+ if (Part > 0 && UniformAcrossUFs)
+ Res = State.get(this, VPIteration(0, 0));
+ else
+ Res = generate(State, Part);
+ State.set(this, Res, VPIteration(Part, 0));
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPUniformPerUFRecipe ::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "UNIFORM-PER-UF ";
+ printAsOperand(O, SlotTracker);
+ O << " = " << Instruction::getOpcodeName(Opcode) << " ";
+ printOperands(O, SlotTracker);
+ O << " to " << *ResultTy;
+}
+#endif
+
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Branch on Mask works only on single instance.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b3694e74a385099..6ba8901e76aa50e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -505,6 +505,15 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
}
+ VPTypeAnalysis TypeInfo(SE.getContext());
+ if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) {
+ Step = new VPUniformPerUFRecipe(Instruction::Trunc, Step,
+ TypeInfo.inferScalarType(BaseIV));
+ auto *VecPreheader =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+ VecPreheader->appendRecipe(Step->getDefiningRecipe());
+ }
+
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
HeaderVPBB->insert(Steps, IP);
return Steps;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 8cc98f4abf933e0..009edea39a3c438 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -362,6 +362,7 @@ class VPDef {
// START: Phi-like recipes. Need to be kept together.
VPBlendSC,
VPPredInstPHISC,
+ VPUniformPerUFSC,
// START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
// VPHeaderPHIRecipe need to be kept together.
VPCanonicalIVPHISC,
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index c5edf9831d7d905..4121a1399c47f58 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -83,12 +83,14 @@ define void @cast_variable_step(i64 %step) {
; VF4: middle.block:
;
; IC2-LABEL: @cast_variable_step(
+; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
+; IC2: br label %vector.body
+
; IC2-LABEL: vector.body:
; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
; IC2-NEXT: [[MUL:%.+]] = mul i64 %index, %step
; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]]
; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
-; IC2-NEXT: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]]
; IC2-NEXT: [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]]
; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]]
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 297cd2a7c12f9ad..6410a556589f94e 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -184,6 +184,7 @@ exit:
; DBG-NEXT: No successors
; DBG-EMPTY:
; DBG-NEXT: vector.ph:
+; DBG-NEXT: UNIFORM-PER-UF vp<[[CAST:%.+]]> = trunc ir<1> to i32
; DBG-NEXT: Successor(s): vector loop
; DBG-EMPTY:
; DBG-NEXT: <x1> vector loop: {
@@ -191,7 +192,7 @@ exit:
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32)
-; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
+; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[CAST]]>
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
index ea3de4a0fbb363e..f0220f5e766b232 100644
--- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
@@ -43,7 +43,7 @@ define void @test(i16 %x, i64 %y, ptr %ptr) {
; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1
; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]]
-; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.exit:
; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]]
; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1
>From 6b3e52eebb0bc89e802c6d83afc2b2f79e5123a9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 21 Jan 2024 21:05:36 +0000
Subject: [PATCH 2/8] !fixup specialize to VPScalarCastRecipe for now.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 59 +++++++++----------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 +++--
.../Transforms/Vectorize/VPlanTransforms.cpp | 4 +-
llvm/lib/Transforms/Vectorize/VPlanValue.h | 2 +-
.../interleave-and-scalarize-only.ll | 2 +-
5 files changed, 41 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ebdc4678853894d..59eccf135dbe434 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -859,6 +859,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
+ case VPRecipeBase::VPScalarCastSC:
return true;
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPBranchOnMaskSC:
@@ -1338,6 +1339,34 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
+/// VPScalarCastRecipe is a recipe o create scalar cast instructions.
+class VPScalarCastRecipe : public VPRecipeBase, public VPValue {
+ /// Cast instruction opcode.
+ Instruction::CastOps Opcode;
+
+ /// Result type for the cast.
+ Type *ResultTy;
+
+ Value *generate(VPTransformState &State, unsigned Part);
+
+public:
+ VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPRecipeBase(VPDef::VPScalarCastSC, {Op}), VPValue(this),
+ Opcode(Opcode), ResultTy(ResultTy) {}
+
+ ~VPScalarCastRecipe() override = default;
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPSingleDefRecipe {
/// ID of the vector intrinsic to call when widening the call. If set the
@@ -2010,36 +2039,6 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
}
};
-/// VPUniformPerUFRecipe represents an instruction with Opcode that is uniform
-/// per UF, i.e. it generates a single scalar instance per UF.
-/// TODO: at the moment, only Cast opcodes are supported, extend to support
-/// missing opcodes to replace uniform part of VPReplicateRecipe.
-class VPUniformPerUFRecipe : public VPRecipeBase, public VPValue {
- unsigned Opcode;
-
- /// Result type for the cast.
- Type *ResultTy;
-
- Value *generate(VPTransformState &State, unsigned Part);
-
-public:
- VPUniformPerUFRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPRecipeBase(VPDef::VPUniformPerUFSC, {Op}), VPValue(this),
- Opcode(Opcode), ResultTy(ResultTy) {}
-
- ~VPUniformPerUFRecipe() override = default;
-
- VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
-
- void execute(VPTransformState &State) override;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-};
-
/// A recipe for generating conditional branches on the bits of a mask.
class VPBranchOnMaskRecipe : public VPRecipeBase {
public:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f6dd13c6375cfd9..fe93bae09f0d420 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -164,7 +164,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();
}
- case VPUniformPerUFSC:
+ case VPScalarCastSC:
return false;
default:
return true;
@@ -1465,7 +1465,9 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) {
+Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
+ assert(vputils::onlyFirstLaneUsed(this) &&
+ "Codegen only implemented for first lane only.");
switch (Opcode) {
case Instruction::SExt:
case Instruction::ZExt:
@@ -1478,7 +1480,7 @@ Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) {
}
}
-void VPUniformPerUFRecipe ::execute(VPTransformState &State) {
+void VPScalarCastRecipe ::execute(VPTransformState &State) {
bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) {
return Op->isDefinedOutsideVectorRegions();
});
@@ -1494,9 +1496,9 @@ void VPUniformPerUFRecipe ::execute(VPTransformState &State) {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPUniformPerUFRecipe ::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "UNIFORM-PER-UF ";
+void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "SCALAR-CAST ";
printAsOperand(O, SlotTracker);
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
printOperands(O, SlotTracker);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c73058a24eb1553..c485eadf9e0f36a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -506,8 +506,8 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
VPTypeAnalysis TypeInfo(SE.getContext());
if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) {
- Step = new VPUniformPerUFRecipe(Instruction::Trunc, Step,
- TypeInfo.inferScalarType(BaseIV));
+ Step = new VPScalarCastRecipe(Instruction::Trunc, Step,
+ TypeInfo.inferScalarType(BaseIV));
auto *VecPreheader =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
VecPreheader->appendRecipe(Step->getDefiningRecipe());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 009edea39a3c438..bbbf2d3a965dbf7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -362,7 +362,7 @@ class VPDef {
// START: Phi-like recipes. Need to be kept together.
VPBlendSC,
VPPredInstPHISC,
- VPUniformPerUFSC,
+ VPScalarCastSC,
// START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
// VPHeaderPHIRecipe need to be kept together.
VPCanonicalIVPHISC,
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 6410a556589f94e..c3bf2eee1dfc88c 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -184,7 +184,7 @@ exit:
; DBG-NEXT: No successors
; DBG-EMPTY:
; DBG-NEXT: vector.ph:
-; DBG-NEXT: UNIFORM-PER-UF vp<[[CAST:%.+]]> = trunc ir<1> to i32
+; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
; DBG-NEXT: Successor(s): vector loop
; DBG-EMPTY:
; DBG-NEXT: <x1> vector loop: {
>From 9331a454be3ca943244ddd02c934192eda98ec39 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 22 Jan 2024 12:49:52 +0000
Subject: [PATCH 3/8] !fixup address latest comments, thanks!
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++------
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 7 +++----
llvm/lib/Transforms/Vectorize/VPlanValue.h | 2 +-
4 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6ca93e15719fb27..7d1708b36a87865 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2297,6 +2297,7 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
? B.CreateSExtOrTrunc(Index, StepTy)
: B.CreateCast(Instruction::SIToFP, Index, StepTy);
if (CastedIndex != Index) {
+ assert(!isa<SExtInst>(CastedIndex));
CastedIndex->setName(CastedIndex->getName() + ".cast");
Index = CastedIndex;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 59eccf135dbe434..4350b6f81e55801 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1339,9 +1339,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
-/// VPScalarCastRecipe is a recipe o create scalar cast instructions.
-class VPScalarCastRecipe : public VPRecipeBase, public VPValue {
- /// Cast instruction opcode.
+/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
+class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;
/// Result type for the cast.
@@ -1351,17 +1350,16 @@ class VPScalarCastRecipe : public VPRecipeBase, public VPValue {
public:
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPRecipeBase(VPDef::VPScalarCastSC, {Op}), VPValue(this),
+ : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}),
Opcode(Opcode), ResultTy(ResultTy) {}
~VPScalarCastRecipe() override = default;
- VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
+ VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fe93bae09f0d420..36c8f8e77e93534 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -117,6 +117,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
switch (getVPDefID()) {
case VPDerivedIVSC:
case VPPredInstPHISC:
+ case VPScalarCastSC:
return false;
case VPInstructionSC:
switch (cast<VPInstruction>(this)->getOpcode()) {
@@ -164,8 +165,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();
}
- case VPScalarCastSC:
- return false;
default:
return true;
}
@@ -1119,7 +1118,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// Ensure step has the same type as that of scalar IV.
Type *BaseIVTy = BaseIV->getType()->getScalarType();
- assert(BaseIVTy == Step->getType());
+ assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
// We build scalar steps for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -1467,7 +1466,7 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
assert(vputils::onlyFirstLaneUsed(this) &&
- "Codegen only implemented for first lane only.");
+ "Codegen only implemented for first lane.");
switch (Opcode) {
case Instruction::SExt:
case Instruction::ZExt:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index bbbf2d3a965dbf7..c85f7715feaa2ab 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -350,6 +350,7 @@ class VPDef {
VPInterleaveSC,
VPReductionSC,
VPReplicateSC,
+ VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
VPWidenCallSC,
@@ -362,7 +363,6 @@ class VPDef {
// START: Phi-like recipes. Need to be kept together.
VPBlendSC,
VPPredInstPHISC,
- VPScalarCastSC,
// START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
// VPHeaderPHIRecipe need to be kept together.
VPCanonicalIVPHISC,
>From 9988d78ee278fc6664c2f3c6073cddf88ca50755 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 22 Jan 2024 12:55:35 +0000
Subject: [PATCH 4/8] !fixup fix formatting
---
llvm/lib/Transforms/Vectorize/VPlan.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 4350b6f81e55801..6192dc09fe231df 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1350,8 +1350,8 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
public:
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
- : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}),
- Opcode(Opcode), ResultTy(ResultTy) {}
+ : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
+ ResultTy(ResultTy) {}
~VPScalarCastRecipe() override = default;
>From 5500bdbe8c3576ad2b9f3f17166c4457d94bcb74 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 22 Jan 2024 21:51:21 +0000
Subject: [PATCH 5/8] !fixup address comments, use to truncate
VPDerivedIVRecipe.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 7 -----
llvm/lib/Transforms/Vectorize/VPlan.h | 16 ++++------
.../Transforms/Vectorize/VPlanAnalysis.cpp | 2 ++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 18 +++++++-----
.../Transforms/Vectorize/VPlanTransforms.cpp | 29 ++++++++++++++-----
5 files changed, 40 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7d1708b36a87865..cf8e98cbc38a86c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2297,7 +2297,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
? B.CreateSExtOrTrunc(Index, StepTy)
: B.CreateCast(Instruction::SIToFP, Index, StepTy);
if (CastedIndex != Index) {
- assert(!isa<SExtInst>(CastedIndex));
CastedIndex->setName(CastedIndex->getName() + ".cast");
Index = CastedIndex;
}
@@ -9285,12 +9284,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
Kind, cast_if_present<BinaryOperator>(FPBinOp));
DerivedIV->setName("offset.idx");
- if (TruncResultTy) {
- assert(TruncResultTy != DerivedIV->getType() &&
- Step->getType()->isIntegerTy() &&
- "Truncation requires an integer step");
- DerivedIV = State.Builder.CreateTrunc(DerivedIV, TruncResultTy);
- }
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
State.set(this, DerivedIV, VPIteration(0, 0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6192dc09fe231df..e6cbd81f0627821 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1343,7 +1343,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;
- /// Result type for the cast.
Type *ResultTy;
Value *generate(VPTransformState &State, unsigned Part);
@@ -1363,6 +1362,9 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ /// Returns the result type of the cast.
+ Type *getResultType() const { return ResultTy; }
};
/// A recipe for widening Call instructions.
@@ -2347,10 +2349,6 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
/// an IV with different start and step values, using Start + CanonicalIV *
/// Step.
class VPDerivedIVRecipe : public VPSingleDefRecipe {
- /// If not nullptr, the result of the induction will get truncated to
- /// TruncResultTy.
- Type *TruncResultTy;
-
/// Kind of the induction.
const InductionDescriptor::InductionKind Kind;
/// If not nullptr, the floating point induction binary operator. Must be set
@@ -2359,10 +2357,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
- VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
- Type *TruncResultTy)
+ VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
- TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
+ Kind(IndDesc.getKind()),
FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
}
@@ -2381,8 +2378,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
#endif
Type *getScalarType() const {
- return TruncResultTy ? TruncResultTy
- : getStartValue()->getLiveInIRValue()->getType();
+ return getStartValue()->getLiveInIRValue()->getType();
}
VPValue *getStartValue() const { return getOperand(0); }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index d71b07039944500..515dc41a55ea1b0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -231,6 +231,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
})
.Case<VPWidenCastRecipe>(
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
+ .Case<VPScalarCastRecipe>(
+ [](const VPScalarCastRecipe *R) { return R->getResultType(); })
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
return R->getSCEV()->getType();
});
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 36c8f8e77e93534..afad4f068dd80fc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1097,9 +1097,6 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
getCanonicalIV()->printAsOperand(O, SlotTracker);
O << " * ";
getStepValue()->printAsOperand(O, SlotTracker);
-
- if (TruncResultTy)
- O << " (truncated to " << *TruncResultTy << ")";
}
#endif
@@ -1464,6 +1461,12 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
+ return C->isDefinedOutsideVectorRegions() ||
+ isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
+ isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
+}
+
Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
assert(vputils::onlyFirstLaneUsed(this) &&
"Codegen only implemented for first lane.");
@@ -1480,13 +1483,12 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
}
void VPScalarCastRecipe ::execute(VPTransformState &State) {
- bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) {
- return Op->isDefinedOutsideVectorRegions();
- });
+ bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
for (unsigned Part = 0; Part != State.UF; ++Part) {
Value *Res;
- // Only generate a single instance, if the recipe is uniform across all UFs.
- if (Part > 0 && UniformAcrossUFs)
+ // Only generate a single instance, if the recipe is uniform across UFs and
+ // VFs.
+ if (Part > 0 && IsUniformAcrossVFsAndUFs)
Res = State.get(this, VPIteration(0, 0));
else
Res = generate(State, Part);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c485eadf9e0f36a..49fc6431bb2b444 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -498,16 +498,31 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
Type *TruncTy = TruncI ? TruncI->getType() : IVTy;
VPValue *BaseIV = CanonicalIV;
+ VPTypeAnalysis TypeInfo(SE.getContext());
+ Type *StepTy = TypeInfo.inferScalarType(Step);
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, TruncTy)) {
- BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step,
- TruncI ? TruncI->getType() : nullptr);
- HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
+ // If the induction needs transforming besides truncating, create a
+ // VPDerivedIVRecipe.
+ if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, IVTy)) {
+ BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
+ HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
+ }
+ if (TypeInfo.inferScalarType(BaseIV) != TruncTy) {
+ assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() >
+ TruncTy->getScalarSizeInBits() &&
+ StepTy->isIntegerTy() && "Truncation requires an integer step");
+ auto *T = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
+ HeaderVPBB->insert(T, IP);
+ BaseIV = T;
+ }
}
- VPTypeAnalysis TypeInfo(SE.getContext());
- if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) {
- Step = new VPScalarCastRecipe(Instruction::Trunc, Step,
- TypeInfo.inferScalarType(BaseIV));
+ Type *BaseIVTy = TypeInfo.inferScalarType(BaseIV);
+ if (BaseIVTy != StepTy) {
+ assert(StepTy->getScalarSizeInBits() > BaseIVTy->getScalarSizeInBits() &&
+ "Not truncating.");
+
+ Step = new VPScalarCastRecipe(Instruction::Trunc, Step, BaseIVTy);
auto *VecPreheader =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
VecPreheader->appendRecipe(Step->getDefiningRecipe());
>From f1f1eff0dfe84b2fc34d65460fb62480fbb2b9fb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 25 Jan 2024 11:28:48 +0000
Subject: [PATCH 6/8] !fixup address latest comments, thanks!
---
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 13 ++++--
.../Transforms/Vectorize/VPlanTransforms.cpp | 40 +++++++++----------
.../interleave-and-scalarize-only.ll | 4 +-
4 files changed, 32 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e6cbd81f0627821..6715f73e3fa20db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2283,10 +2283,9 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
}
/// Check if the induction described by \p Kind, /p Start and \p Step is
- /// canonical, i.e. has the same start, step (of 1), and type as the
- /// canonical IV.
+ /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
- VPValue *Step, Type *Ty) const;
+ VPValue *Step) const;
};
/// A recipe for generating the active lane mask for the vector loop that is
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index afad4f068dd80fc..f993844c3d898b6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1461,6 +1461,11 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
+/// if it is either defined outside the vector region or its operand is known to
+/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
+/// TODO: Uniformity should be associated with a VPValue and there should be a
+/// generic way to check.
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
return C->isDefinedOutsideVectorRegions() ||
isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
@@ -1625,10 +1630,10 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
bool VPCanonicalIVPHIRecipe::isCanonical(
- InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step,
- Type *Ty) const {
- // The types must match and it must be an integer induction.
- if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction)
+ InductionDescriptor::InductionKind Kind, VPValue *Start,
+ VPValue *Step) const {
+ // Must be an integer induction.
+ if (Kind != InductionDescriptor::IK_IntInduction)
return false;
// Start must match the start value of this canonical induction.
if (Start != getStartValue())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 49fc6431bb2b444..a50e0c8bcd82963 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -491,32 +491,30 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
ScalarEvolution &SE, Instruction *TruncI,
- Type *IVTy, VPValue *StartV,
- VPValue *Step) {
+ VPValue *StartV, VPValue *Step) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto IP = HeaderVPBB->getFirstNonPhi();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
- Type *TruncTy = TruncI ? TruncI->getType() : IVTy;
VPValue *BaseIV = CanonicalIV;
VPTypeAnalysis TypeInfo(SE.getContext());
Type *StepTy = TypeInfo.inferScalarType(Step);
- if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, TruncTy)) {
- // If the induction needs transforming besides truncating, create a
- // VPDerivedIVRecipe.
- if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, IVTy)) {
- BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
- HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
- }
- if (TypeInfo.inferScalarType(BaseIV) != TruncTy) {
- assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() >
- TruncTy->getScalarSizeInBits() &&
- StepTy->isIntegerTy() && "Truncation requires an integer step");
- auto *T = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
- HeaderVPBB->insert(T, IP);
- BaseIV = T;
- }
+ if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
+ BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
+ HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
+ }
+
+ // Truncate base induction if needed.
+ if (TruncI) {
+ Type *TruncTy = TruncI->getType();
+ assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() >
+ TruncTy->getScalarSizeInBits() &&
+ StepTy->isIntegerTy() && "Truncation requires an integer step");
+ auto *T = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
+ HeaderVPBB->insert(T, IP);
+ BaseIV = T;
}
+ // Truncate step if needed.
Type *BaseIVTy = TypeInfo.inferScalarType(BaseIV);
if (BaseIVTy != StepTy) {
assert(StepTy->getScalarSizeInBits() > BaseIVTy->getScalarSizeInBits() &&
@@ -547,9 +545,9 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
continue;
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
- VPValue *Steps = createScalarIVSteps(
- Plan, ID, SE, WideIV->getTruncInst(), WideIV->getPHINode()->getType(),
- WideIV->getStartValue(), WideIV->getStepValue());
+ VPValue *Steps =
+ createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
+ WideIV->getStartValue(), WideIV->getStepValue());
// Update scalar users of IV to use Step instead.
if (!HasOnlyVectorVFs)
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index c3bf2eee1dfc88c..81cc2024bb31a5f 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -191,8 +191,8 @@ exit:
; DBG-NEXT: vector.body:
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
-; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32)
-; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[CAST]]>
+; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
+; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
>From d38223249db3da1dc85b19c29a2488a3532f44eb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 25 Jan 2024 17:56:40 +0000
Subject: [PATCH 7/8] !fixup address latest comments, thanks!
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 18 +++++++++---------
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f993844c3d898b6..dd9d211ef87bc20 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1479,6 +1479,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
case Instruction::SExt:
case Instruction::ZExt:
case Instruction::Trunc: {
+ // Note: SExt/ZExt not used yet.
Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a50e0c8bcd82963..4173e11380cbb62 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -495,15 +495,15 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto IP = HeaderVPBB->getFirstNonPhi();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
- VPValue *BaseIV = CanonicalIV;
- VPTypeAnalysis TypeInfo(SE.getContext());
- Type *StepTy = TypeInfo.inferScalarType(Step);
+ VPSingleDefRecipe *BaseIV = CanonicalIV;
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
- HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
+ HeaderVPBB->insert(BaseIV, IP);
}
// Truncate base induction if needed.
+ VPTypeAnalysis TypeInfo(SE.getContext());
+ Type *StepTy = TypeInfo.inferScalarType(Step);
if (TruncI) {
Type *TruncTy = TruncI->getType();
assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() >
@@ -515,12 +515,12 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
}
// Truncate step if needed.
- Type *BaseIVTy = TypeInfo.inferScalarType(BaseIV);
- if (BaseIVTy != StepTy) {
- assert(StepTy->getScalarSizeInBits() > BaseIVTy->getScalarSizeInBits() &&
- "Not truncating.");
+ Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
+ if (ResultTy != StepTy) {
+ assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
+ StepTy->isIntegerTy() && "Truncation requires an integer step");
- Step = new VPScalarCastRecipe(Instruction::Trunc, Step, BaseIVTy);
+ Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy);
auto *VecPreheader =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
VecPreheader->appendRecipe(Step->getDefiningRecipe());
>From 6efa5774fcf2cd2d6cee79f51b4213ffb1173c72 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 25 Jan 2024 23:05:57 +0000
Subject: [PATCH 8/8] !fixup address missed comments
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4173e11380cbb62..9cf5accf0192d36 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -503,23 +503,22 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
// Truncate base induction if needed.
VPTypeAnalysis TypeInfo(SE.getContext());
- Type *StepTy = TypeInfo.inferScalarType(Step);
+ Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
if (TruncI) {
Type *TruncTy = TruncI->getType();
- assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() >
- TruncTy->getScalarSizeInBits() &&
- StepTy->isIntegerTy() && "Truncation requires an integer step");
- auto *T = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
- HeaderVPBB->insert(T, IP);
- BaseIV = T;
+ assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&
+ "Not truncating.");
+ assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");
+ BaseIV = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
+ HeaderVPBB->insert(BaseIV, IP);
}
// Truncate step if needed.
- Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
+ Type *StepTy = TypeInfo.inferScalarType(Step);
if (ResultTy != StepTy) {
assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
- StepTy->isIntegerTy() && "Truncation requires an integer step");
-
+ "Not truncating.");
+ assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy);
auto *VecPreheader =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
More information about the libcxx-commits
mailing list