[llvm] 9317975 - [VPlan] Match legacy behavior w.r.t. using pointer phis as scalar addrs.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 03:09:53 PDT 2025
Author: Florian Hahn
Date: 2025-10-20T11:09:25+01:00
New Revision: 9317975a7abe8f863076576230a7aefe41c83180
URL: https://github.com/llvm/llvm-project/commit/9317975a7abe8f863076576230a7aefe41c83180
DIFF: https://github.com/llvm/llvm-project/commit/9317975a7abe8f863076576230a7aefe41c83180.diff
LOG: [VPlan] Match legacy behavior w.r.t. using pointer phis as scalar addrs.
When the legacy cost model scalarizes loads that are used as addresses
for other loads and stores, it looks to phi nodes, if they are direct
address operands of loads/stores. Match this behavior in
isUsedByLoadStoreAddress, to fix a divergence between legacy and
VPlan-based cost model.
Added:
llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs-max-bandwidth.ll
Modified:
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a865b2d1f9d3b..1f1b42bb9c19f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3162,7 +3162,17 @@ static bool isUsedByLoadStoreAddress(const VPUser *V) {
while (!WorkList.empty()) {
auto *Cur = dyn_cast<VPSingleDefRecipe>(WorkList.pop_back_val());
- if (!Cur || !Seen.insert(Cur).second || isa<VPBlendRecipe>(Cur))
+ if (!Cur || !Seen.insert(Cur).second)
+ continue;
+
+ auto *Blend = dyn_cast<VPBlendRecipe>(Cur);
+ // Skip blends that use V only through a compare by checking if any incoming
+ // value was already visited.
+ if (Blend && none_of(seq<unsigned>(0, Blend->getNumIncomingValues()),
+ [&](unsigned I) {
+ return Seen.contains(
+ Blend->getIncomingValue(I)->getDefiningRecipe());
+ }))
continue;
for (VPUser *U : Cur->users()) {
@@ -3183,7 +3193,13 @@ static bool isUsedByLoadStoreAddress(const VPUser *V) {
}
}
- append_range(WorkList, cast<VPSingleDefRecipe>(Cur)->users());
+ // The legacy cost model only supports scalarization loads/stores with phi
+ // addresses, if the phi is directly used as load/store address. Don't
+ // traverse further for Blends.
+ if (Blend)
+ continue;
+
+ append_range(WorkList, Cur->users());
}
return false;
}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs-max-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs-max-bandwidth.ll
new file mode 100644
index 0000000000000..5011852f77e08
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs-max-bandwidth.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -vectorizer-maximize-bandwidth -S %s | FileCheck %s
+
+declare void @init(ptr captures(none), ptr captures(none), ptr captures(none))
+
+define void @replicating_store_with_phi_addr1(ptr noalias %array, i64 %N, i32 %x, i1 %cond) {
+; CHECK-LABEL: define void @replicating_store_with_phi_addr1(
+; CHECK-SAME: ptr noalias [[ARRAY:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[PTR1:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[PTR2:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[PTR3:%.*]] = alloca i8, align 1
+; CHECK-NEXT: call void @init(ptr [[PTR1]], ptr [[PTR2]], ptr [[PTR3]])
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x ptr> poison, ptr [[PTR1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT]], <16 x ptr> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i1> poison, i1 [[COND]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT1]], <16 x i1> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE32:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAY]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 [[TMP3]], i32 2
+; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 [[TMP4]], i32 3
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[TMP5]], i32 4
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[TMP6]], i32 5
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[TMP7]], i32 6
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[TMP8]], i32 7
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP24]], i32 [[TMP9]], i32 8
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <16 x i32> [[TMP25]], i32 [[TMP10]], i32 9
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP26]], i32 [[TMP11]], i32 10
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x i32> [[TMP27]], i32 [[TMP12]], i32 11
+; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP28]], i32 [[TMP13]], i32 12
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <16 x i32> [[TMP29]], i32 [[TMP14]], i32 13
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i32> [[TMP30]], i32 [[TMP15]], i32 14
+; CHECK-NEXT: [[TMP32:%.*]] = insertelement <16 x i32> [[TMP31]], i32 [[TMP16]], i32 15
+; CHECK-NEXT: [[TMP33:%.*]] = icmp sgt <16 x i32> [[TMP32]], zeroinitializer
+; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[TMP33]], splat (i1 true)
+; CHECK-NEXT: [[TMP35:%.*]] = icmp slt i32 [[X]], [[TMP1]]
+; CHECK-NEXT: [[TMP36:%.*]] = icmp slt i32 [[X]], [[TMP2]]
+; CHECK-NEXT: [[TMP37:%.*]] = icmp slt i32 [[X]], [[TMP3]]
+; CHECK-NEXT: [[TMP38:%.*]] = icmp slt i32 [[X]], [[TMP4]]
+; CHECK-NEXT: [[TMP39:%.*]] = icmp slt i32 [[X]], [[TMP5]]
+; CHECK-NEXT: [[TMP40:%.*]] = icmp slt i32 [[X]], [[TMP6]]
+; CHECK-NEXT: [[TMP41:%.*]] = icmp slt i32 [[X]], [[TMP7]]
+; CHECK-NEXT: [[TMP42:%.*]] = icmp slt i32 [[X]], [[TMP8]]
+; CHECK-NEXT: [[TMP43:%.*]] = icmp slt i32 [[X]], [[TMP9]]
+; CHECK-NEXT: [[TMP44:%.*]] = icmp slt i32 [[X]], [[TMP10]]
+; CHECK-NEXT: [[TMP45:%.*]] = icmp slt i32 [[X]], [[TMP11]]
+; CHECK-NEXT: [[TMP46:%.*]] = icmp slt i32 [[X]], [[TMP12]]
+; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[X]], [[TMP13]]
+; CHECK-NEXT: [[TMP48:%.*]] = icmp slt i32 [[X]], [[TMP14]]
+; CHECK-NEXT: [[TMP49:%.*]] = icmp slt i32 [[X]], [[TMP15]]
+; CHECK-NEXT: [[TMP50:%.*]] = icmp slt i32 [[X]], [[TMP16]]
+; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP35]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP36]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP37]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP38]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP39]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP40]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP41]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP42]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP59:%.*]] = select i1 [[TMP43]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP44]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP61:%.*]] = select i1 [[TMP45]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP46]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP47]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP48]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP49]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP66:%.*]] = select i1 [[TMP50]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: [[TMP67:%.*]] = insertelement <16 x ptr> poison, ptr [[TMP51]], i32 0
+; CHECK-NEXT: [[TMP68:%.*]] = insertelement <16 x ptr> [[TMP67]], ptr [[TMP52]], i32 1
+; CHECK-NEXT: [[TMP69:%.*]] = insertelement <16 x ptr> [[TMP68]], ptr [[TMP53]], i32 2
+; CHECK-NEXT: [[TMP70:%.*]] = insertelement <16 x ptr> [[TMP69]], ptr [[TMP54]], i32 3
+; CHECK-NEXT: [[TMP71:%.*]] = insertelement <16 x ptr> [[TMP70]], ptr [[TMP55]], i32 4
+; CHECK-NEXT: [[TMP72:%.*]] = insertelement <16 x ptr> [[TMP71]], ptr [[TMP56]], i32 5
+; CHECK-NEXT: [[TMP73:%.*]] = insertelement <16 x ptr> [[TMP72]], ptr [[TMP57]], i32 6
+; CHECK-NEXT: [[TMP74:%.*]] = insertelement <16 x ptr> [[TMP73]], ptr [[TMP58]], i32 7
+; CHECK-NEXT: [[TMP75:%.*]] = insertelement <16 x ptr> [[TMP74]], ptr [[TMP59]], i32 8
+; CHECK-NEXT: [[TMP76:%.*]] = insertelement <16 x ptr> [[TMP75]], ptr [[TMP60]], i32 9
+; CHECK-NEXT: [[TMP77:%.*]] = insertelement <16 x ptr> [[TMP76]], ptr [[TMP61]], i32 10
+; CHECK-NEXT: [[TMP78:%.*]] = insertelement <16 x ptr> [[TMP77]], ptr [[TMP62]], i32 11
+; CHECK-NEXT: [[TMP79:%.*]] = insertelement <16 x ptr> [[TMP78]], ptr [[TMP63]], i32 12
+; CHECK-NEXT: [[TMP80:%.*]] = insertelement <16 x ptr> [[TMP79]], ptr [[TMP64]], i32 13
+; CHECK-NEXT: [[TMP81:%.*]] = insertelement <16 x ptr> [[TMP80]], ptr [[TMP65]], i32 14
+; CHECK-NEXT: [[TMP82:%.*]] = insertelement <16 x ptr> [[TMP81]], ptr [[TMP66]], i32 15
+; CHECK-NEXT: [[TMP83:%.*]] = select <16 x i1> [[TMP33]], <16 x i1> [[BROADCAST_SPLAT2]], <16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP84:%.*]] = or <16 x i1> [[TMP83]], [[TMP34]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP33]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> [[TMP82]]
+; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i1> [[TMP84]], i32 0
+; CHECK-NEXT: br i1 [[TMP85]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP86:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 0
+; CHECK-NEXT: store i8 0, ptr [[TMP86]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP87:%.*]] = extractelement <16 x i1> [[TMP84]], i32 1
+; CHECK-NEXT: br i1 [[TMP87]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP88:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 1
+; CHECK-NEXT: store i8 0, ptr [[TMP88]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; CHECK: [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i1> [[TMP84]], i32 2
+; CHECK-NEXT: br i1 [[TMP89]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: [[TMP90:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 2
+; CHECK-NEXT: store i8 0, ptr [[TMP90]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: [[TMP91:%.*]] = extractelement <16 x i1> [[TMP84]], i32 3
+; CHECK-NEXT: br i1 [[TMP91]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK: [[PRED_STORE_IF7]]:
+; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 3
+; CHECK-NEXT: store i8 0, ptr [[TMP92]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; CHECK: [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i1> [[TMP84]], i32 4
+; CHECK-NEXT: br i1 [[TMP93]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK: [[PRED_STORE_IF9]]:
+; CHECK-NEXT: [[TMP94:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 4
+; CHECK-NEXT: store i8 0, ptr [[TMP94]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; CHECK: [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT: [[TMP95:%.*]] = extractelement <16 x i1> [[TMP84]], i32 5
+; CHECK-NEXT: br i1 [[TMP95]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK: [[PRED_STORE_IF11]]:
+; CHECK-NEXT: [[TMP96:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 5
+; CHECK-NEXT: store i8 0, ptr [[TMP96]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; CHECK: [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i1> [[TMP84]], i32 6
+; CHECK-NEXT: br i1 [[TMP97]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; CHECK: [[PRED_STORE_IF13]]:
+; CHECK-NEXT: [[TMP98:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 6
+; CHECK-NEXT: store i8 0, ptr [[TMP98]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; CHECK: [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT: [[TMP99:%.*]] = extractelement <16 x i1> [[TMP84]], i32 7
+; CHECK-NEXT: br i1 [[TMP99]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; CHECK: [[PRED_STORE_IF15]]:
+; CHECK-NEXT: [[TMP100:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 7
+; CHECK-NEXT: store i8 0, ptr [[TMP100]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; CHECK: [[PRED_STORE_CONTINUE16]]:
+; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i1> [[TMP84]], i32 8
+; CHECK-NEXT: br i1 [[TMP101]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; CHECK: [[PRED_STORE_IF17]]:
+; CHECK-NEXT: [[TMP102:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 8
+; CHECK-NEXT: store i8 0, ptr [[TMP102]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; CHECK: [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT: [[TMP103:%.*]] = extractelement <16 x i1> [[TMP84]], i32 9
+; CHECK-NEXT: br i1 [[TMP103]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK: [[PRED_STORE_IF19]]:
+; CHECK-NEXT: [[TMP104:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 9
+; CHECK-NEXT: store i8 0, ptr [[TMP104]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; CHECK: [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT: [[TMP105:%.*]] = extractelement <16 x i1> [[TMP84]], i32 10
+; CHECK-NEXT: br i1 [[TMP105]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK: [[PRED_STORE_IF21]]:
+; CHECK-NEXT: [[TMP106:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 10
+; CHECK-NEXT: store i8 0, ptr [[TMP106]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; CHECK: [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT: [[TMP107:%.*]] = extractelement <16 x i1> [[TMP84]], i32 11
+; CHECK-NEXT: br i1 [[TMP107]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK: [[PRED_STORE_IF23]]:
+; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 11
+; CHECK-NEXT: store i8 0, ptr [[TMP108]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; CHECK: [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT: [[TMP109:%.*]] = extractelement <16 x i1> [[TMP84]], i32 12
+; CHECK-NEXT: br i1 [[TMP109]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK: [[PRED_STORE_IF25]]:
+; CHECK-NEXT: [[TMP110:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 12
+; CHECK-NEXT: store i8 0, ptr [[TMP110]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; CHECK: [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT: [[TMP111:%.*]] = extractelement <16 x i1> [[TMP84]], i32 13
+; CHECK-NEXT: br i1 [[TMP111]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK: [[PRED_STORE_IF27]]:
+; CHECK-NEXT: [[TMP112:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 13
+; CHECK-NEXT: store i8 0, ptr [[TMP112]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; CHECK: [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT: [[TMP113:%.*]] = extractelement <16 x i1> [[TMP84]], i32 14
+; CHECK-NEXT: br i1 [[TMP113]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
+; CHECK: [[PRED_STORE_IF29]]:
+; CHECK-NEXT: [[TMP114:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 14
+; CHECK-NEXT: store i8 0, ptr [[TMP114]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
+; CHECK: [[PRED_STORE_CONTINUE30]]:
+; CHECK-NEXT: [[TMP115:%.*]] = extractelement <16 x i1> [[TMP84]], i32 15
+; CHECK-NEXT: br i1 [[TMP115]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32]]
+; CHECK: [[PRED_STORE_IF31]]:
+; CHECK-NEXT: [[TMP116:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 15
+; CHECK-NEXT: store i8 0, ptr [[TMP116]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE32]]
+; CHECK: [[PRED_STORE_CONTINUE32]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP117:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
+; CHECK-NEXT: br i1 [[TMP117]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 96, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV]], 99
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_BODY:.*]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_BODY]]:
+; CHECK-NEXT: [[ARRAY_PTR:%.*]] = load ptr, ptr [[ARRAY]], align 8
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAY_PTR]], align 4
+; CHECK-NEXT: [[CMP_POS:%.*]] = icmp sgt i32 [[VAL]], 0
+; CHECK-NEXT: br i1 [[CMP_POS]], label %[[ELSE:.*]], label %[[THEN:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[CMP_X:%.*]] = icmp slt i32 [[X]], [[VAL]]
+; CHECK-NEXT: [[SELECT_PTR:%.*]] = select i1 [[CMP_X]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: br label %[[MERGE_AND_STORE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br i1 [[COND]], label %[[MERGE_AND_STORE]], label %[[LOOP_LATCH]]
+; CHECK: [[MERGE_AND_STORE]]:
+; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[SELECT_PTR]], %[[THEN]] ], [ [[PTR1]], %[[ELSE]] ]
+; CHECK-NEXT: store i8 0, ptr [[PTR_PHI]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: br label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %ptr1 = alloca i8, align 1
+ %ptr2 = alloca i8, align 1
+ %ptr3 = alloca i8, align 1
+ call void @init(ptr %ptr1, ptr %ptr2, ptr %ptr3)
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %ec = icmp ne i64 %iv, 99
+ br i1 %ec, label %loop.body, label %exit
+
+loop.body:
+ %array.ptr = load ptr, ptr %array, align 8
+ %val = load i32, ptr %array.ptr, align 4
+ %cmp.pos = icmp sgt i32 %val, 0
+ br i1 %cmp.pos, label %else, label %then
+
+then:
+ %cmp.x = icmp slt i32 %x, %val
+ %select.ptr = select i1 %cmp.x, ptr %ptr2, ptr %ptr3
+ br label %merge.and.store
+
+else:
+ br i1 %cond, label %merge.and.store, label %loop.latch
+
+merge.and.store:
+ %ptr.phi = phi ptr [ %select.ptr, %then ], [ %ptr1, %else ]
+ store i8 0, ptr %ptr.phi, align 1
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i64 %iv, 1
+ br label %loop.header
+
+exit:
+ ret void
+}
+
+define void @replicating_store_with_phi_addr2(ptr noalias %array, ptr noalias %base, i64 %N, i32 %x, i1 %cond) {
+; CHECK-LABEL: define void @replicating_store_with_phi_addr2(
+; CHECK-SAME: ptr noalias [[ARRAY:%.*]], ptr noalias [[BASE:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[PTR1:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[PTR2:%.*]] = alloca i8, align 1
+; CHECK-NEXT: [[PTR3:%.*]] = alloca i8, align 1
+; CHECK-NEXT: call void @init(ptr [[PTR1]], ptr [[PTR2]], ptr [[PTR3]])
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV]], 99
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_BODY:.*]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_BODY]]:
+; CHECK-NEXT: [[GEP_ARRAY:%.*]] = getelementptr i32, ptr [[ARRAY]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_ARRAY]], align 8
+; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[BASE]], align 4
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[L_PTR]], align 4
+; CHECK-NEXT: [[CMP_POS:%.*]] = icmp sgt i32 [[VAL]], 0
+; CHECK-NEXT: br i1 [[CMP_POS]], label %[[ELSE:.*]], label %[[THEN:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp slt i32 [[X]], [[L]]
+; CHECK-NEXT: [[SELECT_PTR:%.*]] = select i1 [[SEL_CMP]], ptr [[PTR2]], ptr [[PTR3]]
+; CHECK-NEXT: br label %[[MERGE_AND_STORE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br i1 [[COND]], label %[[MERGE_AND_STORE]], label %[[LOOP_LATCH]]
+; CHECK: [[MERGE_AND_STORE]]:
+; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[SELECT_PTR]], %[[THEN]] ], [ [[PTR1]], %[[ELSE]] ]
+; CHECK-NEXT: store i8 0, ptr [[PTR_PHI]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: br label %[[LOOP_HEADER]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %ptr1 = alloca i8, align 1
+ %ptr2 = alloca i8, align 1
+ %ptr3 = alloca i8, align 1
+ call void @init(ptr %ptr1, ptr %ptr2, ptr %ptr3)
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %ec = icmp ne i64 %iv, 99
+ br i1 %ec, label %loop.body, label %exit
+
+loop.body:
+ %gep.array = getelementptr i32, ptr %array, i64 %iv
+ %l = load i32, ptr %gep.array, align 8
+ %l.ptr = load ptr, ptr %base, align 4
+ %val = load i32, ptr %l.ptr, align 4
+ %cmp.pos = icmp sgt i32 %val, 0
+ br i1 %cmp.pos, label %else, label %then
+
+then:
+ %sel.cmp = icmp slt i32 %x, %l
+ %select.ptr = select i1 %sel.cmp, ptr %ptr2, ptr %ptr3
+ br label %merge.and.store
+
+else:
+ br i1 %cond, label %merge.and.store, label %loop.latch
+
+merge.and.store:
+ %ptr.phi = phi ptr [ %select.ptr, %then ], [ %ptr1, %else ]
+ store i8 0, ptr %ptr.phi, align 1
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i64 %iv, 1
+ br label %loop.header
+
+exit:
+ ret void
+}
+
+attributes #0 = { "target-cpu"="znver2" }
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list