[llvm] [LV] Account for vp_merge in out of loop EVL reductions in legacy cost model (PR #115903)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 13 23:18:33 PST 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/115903
>From 9b7097cedf678a87d00e3249e95ded473a89d390 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 13 Nov 2024 00:39:18 +0800
Subject: [PATCH 1/4] Precommit test
---
...rize-force-tail-with-evl-reduction-cost.ll | 34 +++++++++++++++++++
1 file changed, 34 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
new file mode 100644
index 00000000000000..db4b4a4e5c5ed1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
@@ -0,0 +1,34 @@
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s \
+; RUN: --check-prefix=EVL
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s \
+; RUN: --check-prefix=NO-EVL
+
+; EVL: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
+; EVL: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+
+; NO-EVL: Cost of 0 for VF vscale x 4: EMIT vp<%{{.+}}> = select vp<%active.lane.mask>, ir<%add>, ir<%rdx>
+; NO-EVL: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+
+define i32 @add(ptr %a, i64 %n, i32 %start) {
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %add = add nsw i32 %0, %rdx
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret i32 %add
+}
>From 577290a0b4cbace14fa2aff0dae28a78b9835eae Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 13 Nov 2024 00:52:38 +0800
Subject: [PATCH 2/4] [LV] Account for vp_merge in out of loop EVL reductions
in legacy cost model
In #101641, support for out of loop reductions with EVL tail folding was added by transforming selects to vp_merges in transformRecipestoEVLRecipes.
Whilst the select was previously free, the vp_merge wasn't and incurs a cost on RISC-V with the VPlan cost model. But this diverged from the legacy cost model and caused the "VPlan cost model and legacy cost model disagreed" assertion to trigger when building 502.gcc_r from SPEC CPU 2017.
Neither the select nor vp_merge recipes from the VPlan exist in the underlying instructions, so I thought it would make the most sense to fix this by adding the cost to the underlying phi instruction in getInstructionCost.
It's worth noting that on RISC-V this vp_merge won't actually generate any instructions because the mask is all true, and will be folded away. So we should update the cost model at some point to reflect that.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 ++++++++++
.../vectorize-force-tail-with-evl-reduction-cost.ll | 2 +-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1ebc62f9843905..5c3afbe5214feb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6567,6 +6567,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
+ // When tail folding with EVL, if the phi is part of an out of loop reduction
+ // then it will be transformed into a wide vp_merge.
+ if (VF.isVector() && foldTailWithEVL() &&
+ Legal->getReductionVars().contains(Phi) && !isInLoopReduction(Phi)) {
+ IntrinsicCostAttributes ICA(
+ Intrinsic::vp_merge, ToVectorTy(Phi->getType(), VF),
+ {ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)});
+ return TTI.getIntrinsicInstrCost(ICA, CostKind);
+ }
+
return TTI.getCFInstrCost(Instruction::PHI, CostKind);
}
case Instruction::UDiv:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
index db4b4a4e5c5ed1..6d20731d2502b4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
@@ -10,7 +10,7 @@
; RUN: --check-prefix=NO-EVL
; EVL: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
-; EVL: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; EVL: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
; NO-EVL: Cost of 0 for VF vscale x 4: EMIT vp<%{{.+}}> = select vp<%active.lane.mask>, ir<%add>, ir<%rdx>
; NO-EVL: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
>From 6b6819dcd60a5cd9f9d5978b7f421ccfd6a896a7 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 14 Nov 2024 16:10:23 +0900
Subject: [PATCH 3/4] Fix clang-format
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5c3afbe5214feb..d029997e9565f2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6567,8 +6567,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
}
- // When tail folding with EVL, if the phi is part of an out of loop reduction
- // then it will be transformed into a wide vp_merge.
+ // When tail folding with EVL, if the phi is part of an out of loop
+ // reduction then it will be transformed into a wide vp_merge.
if (VF.isVector() && foldTailWithEVL() &&
Legal->getReductionVars().contains(Phi) && !isInLoopReduction(Phi)) {
IntrinsicCostAttributes ICA(
>From c1cf0c2a4cd6d69cc15f7a8caec4b01aa5bf9997 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 14 Nov 2024 16:18:12 +0900
Subject: [PATCH 4/4] Address review comments
---
...rize-force-tail-with-evl-reduction-cost.ll | 27 +++++++------------
1 file changed, 9 insertions(+), 18 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
index 6d20731d2502b4..aa1bb25af930d1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction-cost.ll
@@ -1,34 +1,25 @@
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
; RUN: -force-tail-folding-style=data-with-evl \
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
-; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s \
-; RUN: --check-prefix=EVL
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s
-; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
-; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
-; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s \
-; RUN: --check-prefix=NO-EVL
-
-; EVL: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
-; EVL: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
-
-; NO-EVL: Cost of 0 for VF vscale x 4: EMIT vp<%{{.+}}> = select vp<%active.lane.mask>, ir<%add>, ir<%rdx>
-; NO-EVL: LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
+; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %loop ]
define i32 @add(ptr %a, i64 %n, i32 %start) {
entry:
- br label %for.body
+ br label %loop
-for.body:
- %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
- %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %rdx = phi i32 [ %start, %entry ], [ %add, %loop ]
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
%0 = load i32, ptr %arrayidx, align 4
%add = add nsw i32 %0, %rdx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
- br i1 %exitcond.not, label %for.end, label %for.body
+ br i1 %exitcond.not, label %exit, label %loop
-for.end:
+exit:
ret i32 %add
}
More information about the llvm-commits
mailing list