[PATCH] D130637: [LV] Don't predicate uniform mem op stores unneccessarily
Philip Reames via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 28 08:56:01 PDT 2022
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG82c1b136dbe1: [LV] Don't predicate uniform mem op stores unneccessarily (authored by reames).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D130637/new/
https://reviews.llvm.org/D130637
Files:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
Index: llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
+++ llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
@@ -24,31 +24,14 @@
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[INC]]
-; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP2]], 1
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1>
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
-; CHECK: pred.store.if:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
-; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
-; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.if3:
; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; CHECK: pred.store.continue4:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1451,8 +1451,14 @@
// don't need to mark it as a predicated instruction. Tail folding may
// introduce additional predication, but we're guaranteed to always have
// at least one active lane. We call Legal->blockNeedsPredication here
- // because it doesn't query tail-folding.
- if (Legal->isUniformMemOp(*I) && isa<LoadInst>(I) &&
+ // because it doesn't query tail-folding. For stores, we need to prove
+ // both speculation safety (which follows from the same argument as loads),
+ // but also must prove the value being stored is correct. The easiest
+ // form of the later is to require that all values stored are the same.
+ if (Legal->isUniformMemOp(*I) &&
+ (isa<LoadInst>(I) ||
+ (isa<StoreInst>(I) &&
+ TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
!Legal->blockNeedsPredication(I->getParent()))
return false;
if (!blockNeedsPredicationForAnyReason(I->getParent()))
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D130637.448354.patch
Type: text/x-patch
Size: 3994 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220728/bc0ec983/attachment.bin>
More information about the llvm-commits
mailing list