[PATCH] D155876: [PowerPC] vector cost model add cost to extract i1
Roland Froese via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 20 11:53:45 PDT 2023
RolandF created this revision.
Herald added subscribers: shchenz, kbarton, hiraditya, nemanjai.
Herald added a project: All.
RolandF requested review of this revision.
Herald added subscribers: llvm-commits, wangpc.
Herald added a project: LLVM.
Try to avoid some unprofitable predication on PPC. Recognize in the cost model that computing on i1 values will require extra mask or compare operation.
https://reviews.llvm.org/D155876
Files:
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/test/Transforms/LoopVectorize/PowerPC/predcost.ll
Index: llvm/test/Transforms/LoopVectorize/PowerPC/predcost.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/PowerPC/predcost.ll
@@ -0,0 +1,45 @@
+; RUN: opt -ppc-vec-mask-cost=true -aa-pipeline=basic-aa -mcpu=pwr8 -S -passes=loop-vectorize < %s | FileCheck %s
+
+target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define dso_local void @_tc(ptr nocapture noundef %aaa, i64 noundef %bbb) local_unnamed_addr #0 {
+; CHECK-NOT: extractelement <16 x i1>
+entry:
+ %cmp7 = icmp sgt i64 %bbb, 0
+ br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ %arrayidx3 = getelementptr inbounds i8, ptr %aaa, i64 %bbb
+ store i8 0, ptr %arrayidx3, align 1
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.inc
+ %i.08 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, ptr %aaa, i64 %i.08
+ %0 = load i8, ptr %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ store i8 32, ptr %arrayidx, align 1
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nuw nsw i64 %i.08, 1
+ %exitcond.not = icmp eq i64 %inc, %bbb
+ br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -27,6 +27,9 @@
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
+cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
+
static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
@@ -700,6 +703,9 @@
return Cost;
} else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
+ unsigned EltSize = Val->getScalarSizeInBits();
+ // Computing on 1 bit values requires extra mask or compare operations.
+ unsigned MaskCost = VecMaskCost && EltSize == 1 ? 1 : 0;
if (ST->hasP9Altivec()) {
if (ISD == ISD::INSERT_VECTOR_ELT)
// A move-to VSR and a permute/insert. Assume vector operation cost
@@ -721,12 +727,14 @@
// We need a vector extract (or mfvsrld). Assume vector operation cost.
// The cost of the load constant for a vector extract is disregarded
// (invariant, easily schedulable).
- return CostFactor;
+ return CostFactor + MaskCost;
} else if (ST->hasDirectMove())
// Assume permute has standard cost.
// Assume move-to/move-from VSR have 2x standard cost.
- return 3;
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ return 3;
+ return 3 + MaskCost;
}
// Estimated cost of a load-hit-store delay. This was obtained
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155876.542620.patch
Type: text/x-patch
Size: 3846 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230720/5c0a6713/attachment.bin>
More information about the llvm-commits
mailing list