[llvm] [VPlan] Add new VPUniformPerUFRecipe, use for step truncation. (PR #78113)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 14 12:17:05 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Add a new recipe to model uniform-per-UF instructions, without relying on an underlying instruction. Initially, it supports uniform cast-ops and is therefore storing the result type.
Not relying on an underlying instruction (like the current VPReplicateRecipe) allows to create instances without a corresponding instruction.
In the future, to plan is to extend this recipe to handle all opcodes needed to replace the uniform part of VPReplicateRecipe.
---
Full diff: https://github.com/llvm/llvm-project/pull/78113.diff
8 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+30)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+5-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+42-7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+9)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
- (modified) llvm/test/Transforms/LoopVectorize/cast-induction.ll (+3-1)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 4b4f4911eb6415..d5985224cccc48 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1945,6 +1945,36 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue {
}
};
+/// VPUniformPerUFRecipe represents an instruction with Opcode that is uniform
+/// per UF, i.e. it generates a single scalar instance per UF.
+/// TODO: at the moment, only Cast opcodes are supported, extend to support
+/// missing opcodes to replace uniform part of VPReplicateRecipe.
+class VPUniformPerUFRecipe : public VPRecipeBase, public VPValue {
+ unsigned Opcode;
+
+ /// Result type for the cast.
+ Type *ResultTy;
+
+ Value *generate(VPTransformState &State, unsigned Part);
+
+public:
+ VPUniformPerUFRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPRecipeBase(VPDef::VPUniformPerUFSC, {Op}), VPValue(this),
+ Opcode(Opcode), ResultTy(ResultTy) {}
+
+ ~VPUniformPerUFRecipe() override = default;
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for generating conditional branches on the bits of a mask.
class VPBranchOnMaskRecipe : public VPRecipeBase {
public:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 97a8a1803bbf5a..d71b0703994450 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -230,7 +230,11 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
return V->getUnderlyingValue()->getType();
})
.Case<VPWidenCastRecipe>(
- [](const VPWidenCastRecipe *R) { return R->getResultType(); });
+ [](const VPWidenCastRecipe *R) { return R->getResultType(); })
+ .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
+ return R->getSCEV()->getType();
+ });
+
assert(ResultTy && "could not infer type for the given VPValue");
CachedTypes[V] = ResultTy;
return ResultTy;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1f844bce23102e..423504e8f7e05e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -164,6 +164,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();
}
+ case VPUniformPerUFSC:
+ return false;
default:
return true;
}
@@ -1117,13 +1119,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// Ensure step has the same type as that of scalar IV.
Type *BaseIVTy = BaseIV->getType()->getScalarType();
- if (BaseIVTy != Step->getType()) {
- // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
- // avoid separate truncate here.
- assert(Step->getType()->isIntegerTy() &&
- "Truncation requires an integer step");
- Step = State.Builder.CreateTrunc(Step, BaseIVTy);
- }
+ assert(BaseIVTy == Step->getType());
// We build scalar steps for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -1469,6 +1465,45 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) {
+ switch (Opcode) {
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::Trunc: {
+ Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
+ return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
+ }
+ default:
+ llvm_unreachable("opcode not implemented yet");
+ }
+}
+
+void VPUniformPerUFRecipe ::execute(VPTransformState &State) {
+ bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) {
+ return Op->isDefinedOutsideVectorRegions();
+ });
+ for (unsigned Part = 0; Part != State.UF; ++Part) {
+ Value *Res;
+ // Only generate a single instance, if the recipe is uniform across all UFs.
+ if (Part > 0 && UniformAcrossUFs)
+ Res = State.get(this, VPIteration(0, 0));
+ else
+ Res = generate(State, Part);
+ State.set(this, Res, VPIteration(Part, 0));
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPUniformPerUFRecipe ::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "UNIFORM-PER-UF ";
+ printAsOperand(O, SlotTracker);
+ O << " = " << Instruction::getOpcodeName(Opcode) << " ";
+ printOperands(O, SlotTracker);
+ O << " to " << *ResultTy;
+}
+#endif
+
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Branch on Mask works only on single instance.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b3694e74a38509..6ba8901e76aa50 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -505,6 +505,15 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
}
+ VPTypeAnalysis TypeInfo(SE.getContext());
+ if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) {
+ Step = new VPUniformPerUFRecipe(Instruction::Trunc, Step,
+ TypeInfo.inferScalarType(BaseIV));
+ auto *VecPreheader =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+ VecPreheader->appendRecipe(Step->getDefiningRecipe());
+ }
+
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
HeaderVPBB->insert(Steps, IP);
return Steps;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 8cc98f4abf933e..009edea39a3c43 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -362,6 +362,7 @@ class VPDef {
// START: Phi-like recipes. Need to be kept together.
VPBlendSC,
VPPredInstPHISC,
+ VPUniformPerUFSC,
// START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
// VPHeaderPHIRecipe need to be kept together.
VPCanonicalIVPHISC,
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index c5edf9831d7d90..4121a1399c47f5 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -83,12 +83,14 @@ define void @cast_variable_step(i64 %step) {
; VF4: middle.block:
;
; IC2-LABEL: @cast_variable_step(
+; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
+; IC2: br label %vector.body
+
; IC2-LABEL: vector.body:
; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
; IC2-NEXT: [[MUL:%.+]] = mul i64 %index, %step
; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]]
; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
-; IC2-NEXT: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]]
; IC2-NEXT: [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]]
; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]]
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 297cd2a7c12f9a..6410a556589f94 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -184,6 +184,7 @@ exit:
; DBG-NEXT: No successors
; DBG-EMPTY:
; DBG-NEXT: vector.ph:
+; DBG-NEXT: UNIFORM-PER-UF vp<[[CAST:%.+]]> = trunc ir<1> to i32
; DBG-NEXT: Successor(s): vector loop
; DBG-EMPTY:
; DBG-NEXT: <x1> vector loop: {
@@ -191,7 +192,7 @@ exit:
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32)
-; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
+; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[CAST]]>
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
index ea3de4a0fbb363..f0220f5e766b23 100644
--- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
@@ -43,7 +43,7 @@ define void @test(i16 %x, i64 %y, ptr %ptr) {
; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1
; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]]
-; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.exit:
; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]]
; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1
``````````
</details>
https://github.com/llvm/llvm-project/pull/78113
More information about the llvm-commits
mailing list