[PATCH] D124718: [VPlan] Do not create VPWidenCall recipes for scalar vector factors.
Florian Hahn via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 2 11:47:26 PDT 2022
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0ef8ca6d88aa: [VPlan] Do not create VPWidenCall recipes for scalar vector factors. (authored by fhahn).
Changed prior to commit:
https://reviews.llvm.org/D124718?vs=426245&id=426468#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D124718/new/
https://reviews.llvm.org/D124718
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
llvm/test/Transforms/LoopVectorize/interleave-with-call.ll
Index: llvm/test/Transforms/LoopVectorize/interleave-with-call.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/interleave-with-call.ll
+++ llvm/test/Transforms/LoopVectorize/interleave-with-call.ll
@@ -12,7 +12,7 @@
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; CHECK-NEXT: vp<[[IV_STEPS:%.]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<%start>, ir<1>
-; CHECK-NEXT: WIDEN-CALL ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>)
+; CHECK-NEXT: CLONE ir<%min> = call vp<[[IV_STEPS]]>, ir<65535>, ir<@llvm.smin.i32>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%dst>, vp<[[IV_STEPS]]>
; CHECK-NEXT: CLONE store ir<%min>, ir<%arrayidx>
; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = VF * UF +(nuw) vp<[[CAN_IV]]>
@@ -27,8 +27,8 @@
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 %start, [[INDEX]]
; CHECK-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[INDUCTION1:%.*]] = add i32 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535)
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535)
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION]], i32 65535)
+; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[INDUCTION1]], i32 65535)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDUCTION]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDUCTION1]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 8
Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -1139,10 +1139,10 @@
; CHECK-UNORDERED: [[LOAD5:%.*]] = load float, float*
; CHECK-UNORDERED: [[LOAD6:%.*]] = load float, float*
; CHECK-UNORDERED: [[LOAD7:%.*]] = load float, float*
-; CHECK-UNORDERED: [[FMULADD]] = call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]])
-; CHECK-UNORDERED: [[FMULADD1]] = call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]])
-; CHECK-UNORDERED: [[FMULADD2]] = call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]])
-; CHECK-UNORDERED: [[FMULADD3]] = call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]])
+; CHECK-UNORDERED: [[FMULADD]] = tail call float @llvm.fmuladd.f32(float [[LOAD]], float [[LOAD4]], float [[VEC_PHI]])
+; CHECK-UNORDERED: [[FMULADD1]] = tail call float @llvm.fmuladd.f32(float [[LOAD1]], float [[LOAD5]], float [[VEC_PHI1]])
+; CHECK-UNORDERED: [[FMULADD2]] = tail call float @llvm.fmuladd.f32(float [[LOAD2]], float [[LOAD6]], float [[VEC_PHI2]])
+; CHECK-UNORDERED: [[FMULADD3]] = tail call float @llvm.fmuladd.f32(float [[LOAD3]], float [[LOAD7]], float [[VEC_PHI3]])
; CHECK-UNORDERED-NOT: llvm.vector.reduce.fadd
; CHECK-UNORDERED: middle.block:
; CHECK-UNORDERED: [[BIN_RDX:%.*]] = fadd float [[FMULADD1]], [[FMULADD]]
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8329,6 +8329,8 @@
return nullptr;
auto willWiden = [&](ElementCount VF) -> bool {
+ if (VF.isScalar())
+ return false;
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// The following case may be scalarized depending on the VF.
// The flag shows whether we use Intrinsic or a usual Call for vectorized
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D124718.426468.patch
Type: text/x-patch
Size: 3811 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220502/8c39a32c/attachment.bin>
More information about the llvm-commits
mailing list