[llvm] 40a72f8 - [VPlan] Support extracting any lane of uniform value.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 14:16:58 PDT 2024
Author: Florian Hahn
Date: 2024-06-14T22:16:52+01:00
New Revision: 40a72f8cc414726a8be234a260650fd62354da21
URL: https://github.com/llvm/llvm-project/commit/40a72f8cc414726a8be234a260650fd62354da21
DIFF: https://github.com/llvm/llvm-project/commit/40a72f8cc414726a8be234a260650fd62354da21.diff
LOG: [VPlan] Support extracting any lane of uniform value.
If the value we are extracting a lane from is uniform, only the first
lane will be set. Return lane 0 for any requested lane.
This fixes a crash when trying to extract the last lane for a
first-order recurrence resume value.
Fixes https://github.com/llvm/llvm-project/issues/95520.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f17be451e6846..348a2be5072b4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -231,6 +231,11 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
return Data
.PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
}
+ if (!Instance.Lane.isFirstLane() &&
+ vputils::isUniformAfterVectorization(Def) &&
+ hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) {
+ return Data.PerPartScalars[Def][Instance.Part][0];
+ }
assert(hasVectorValue(Def, Instance.Part));
auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7a482455473e4..a3ff6395bb39e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -577,7 +577,8 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
// When loop is unrolled without vectorizing, retrieve UF - Offset.
Res = State.get(getOperand(0), State.UF - Offset);
}
- Res->setName(Name);
+ if (isa<ExtractElementInst>(Res))
+ Res->setName(Name);
return Res;
}
case VPInstruction::LogicalAnd: {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 7bbb94a7856d5..79b3fb02bd0a2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -100,10 +100,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; DEFAULT: middle.block:
; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or <vscale x 4 x i32> [[TMP58]], [[TMP57]]
; DEFAULT-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]])
-; DEFAULT-NEXT: [[TMP61:%.*]] = call i32 @llvm.vscale.i32()
-; DEFAULT-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 4
-; DEFAULT-NEXT: [[TMP63:%.*]] = sub i32 [[TMP62]], 1
-; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[BROADCAST_SPLAT5]], i32 [[TMP63]]
; DEFAULT-NEXT: [[TMP64:%.*]] = call i32 @llvm.vscale.i32()
; DEFAULT-NEXT: [[TMP65:%.*]] = mul i32 [[TMP64]], 4
; DEFAULT-NEXT: [[TMP66:%.*]] = sub i32 [[TMP65]], 1
@@ -112,7 +108,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; DEFAULT: scalar.ph:
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ]
-; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; DEFAULT-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP60]], [[MIDDLE_BLOCK]] ]
; DEFAULT-NEXT: br label [[LOOP:%.*]]
@@ -225,10 +221,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; PRED: middle.block:
; PRED-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP42]])
-; PRED-NEXT: [[TMP46:%.*]] = call i32 @llvm.vscale.i32()
-; PRED-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], 4
-; PRED-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], 1
-; PRED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[BROADCAST_SPLAT]], i32 [[TMP48]]
; PRED-NEXT: [[TMP49:%.*]] = call i32 @llvm.vscale.i32()
; PRED-NEXT: [[TMP50:%.*]] = mul i32 [[TMP49]], 4
; PRED-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], 1
@@ -236,7 +228,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; PRED: scalar.ph:
; PRED-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT8]], [[MIDDLE_BLOCK]] ]
-; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
; PRED-NEXT: br label [[LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 8dd1e71e1f1d0..ff97e15417aff 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -657,13 +657,17 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
; CHECK-NEXT: br i1 true, label %End, label %scalar.ph
+; CHECK: scalar.ph:
+; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ]
+; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[TMP3]], %middle.block ]
+; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[TMP0]], %middle.block ]
+; CHECK: End:
+; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP0]], %middle.block ]
+; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP3]], %middle.block ]
+; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI10]], %middle.block ]
;
Entry:
br label %Loop
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index 6b1d38142b9ec..b24b34c698b59 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -385,12 +385,10 @@ define void @instruction_with_2_FOR_operands(ptr noalias %A, ptr noalias %B, ptr
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
; CHECK-NEXT: br i1 false, label [[BB74:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 1.000000e+00, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 1.000000e+00, [[BB:%.*]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
; CHECK-NEXT: br label [[BB13:%.*]]
; CHECK: bb13:
@@ -463,12 +461,10 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
@@ -556,12 +552,10 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index 7e15f038114fd..544b9cfeb1bba 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -121,12 +121,10 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
@@ -246,10 +244,9 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 665ac0e1fa445..b259375d911ea 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -3750,5 +3750,106 @@ for.end:
ret void
}
+; Test case for https://github.com/llvm/llvm-project/issues/95520.
+define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) {
+; UNROLL-NO-IC-LABEL: @recurence_uniform_load(
+; UNROLL-NO-IC-NEXT: entry:
+; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-IC: vector.ph:
+; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
+; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
+; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; UNROLL-NO-IC-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; UNROLL-NO-IC: middle.block:
+; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; UNROLL-NO-IC: scalar.ph:
+; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
+; UNROLL-NO-IC: loop:
+; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
+; UNROLL-NO-IC-NEXT: [[ADD]] = add i64 [[PHI]], 1
+; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4
+; UNROLL-NO-IC-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
+; UNROLL-NO-IC-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]]
+; UNROLL-NO-IC: exit:
+; UNROLL-NO-IC-NEXT: ret i32 0
+;
+; UNROLL-NO-VF-LABEL: @recurence_uniform_load(
+; UNROLL-NO-VF-NEXT: entry:
+; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-VF: vector.ph:
+; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
+; UNROLL-NO-VF: vector.body:
+; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT: [[TMP0]] = load i32, ptr [[SRC:%.*]], align 4
+; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; UNROLL-NO-VF-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; UNROLL-NO-VF: middle.block:
+; UNROLL-NO-VF-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; UNROLL-NO-VF: scalar.ph:
+; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]]
+; UNROLL-NO-VF: loop:
+; UNROLL-NO-VF-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
+; UNROLL-NO-VF-NEXT: [[ADD]] = add i64 [[PHI]], 1
+; UNROLL-NO-VF-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4
+; UNROLL-NO-VF-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
+; UNROLL-NO-VF-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]]
+; UNROLL-NO-VF: exit:
+; UNROLL-NO-VF-NEXT: ret i32 0
+;
+; SINK-AFTER-LABEL: @recurence_uniform_load(
+; SINK-AFTER-NEXT: entry:
+; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; SINK-AFTER: vector.ph:
+; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
+; SINK-AFTER: vector.body:
+; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
+; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
+; SINK-AFTER-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; SINK-AFTER-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
+; SINK-AFTER: middle.block:
+; SINK-AFTER-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; SINK-AFTER: scalar.ph:
+; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
+; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; SINK-AFTER-NEXT: br label [[LOOP:%.*]]
+; SINK-AFTER: loop:
+; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
+; SINK-AFTER-NEXT: [[ADD]] = add i64 [[PHI]], 1
+; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4
+; SINK-AFTER-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
+; SINK-AFTER-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]]
+; SINK-AFTER: exit:
+; SINK-AFTER-NEXT: ret i32 0
+;
+entry:
+ br label %loop
+
+loop:
+ %phi = phi i64 [ 0, %entry ], [ %add, %loop ]
+ %recur = phi i32 [ 0, %entry ], [ %load, %loop ]
+ %add = add i64 %phi, 1
+ %load = load i32, ptr %src, align 4
+ %icmp = icmp ult i64 %phi, 1
+ br i1 %icmp, label %loop, label %exit
+
+exit:
+ ret i32 0
+}
!2 = !{!"branch_weights", i32 1, i32 1}
More information about the llvm-commits
mailing list