[llvm] 54cb879 - [LV] Add a load focused version of the r45679 test
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 21 15:34:02 PDT 2022
Author: Philip Reames
Date: 2022-07-21T15:33:42-07:00
New Revision: 54cb87964d3513c0765d68bbae594d09f75e2e25
URL: https://github.com/llvm/llvm-project/commit/54cb87964d3513c0765d68bbae594d09f75e2e25
DIFF: https://github.com/llvm/llvm-project/commit/54cb87964d3513c0765d68bbae594d09f75e2e25.diff
LOG: [LV] Add a load focused version of the r45679 test
This a reproducer for bug in predicated instruction handling. The final result code is correct, but the reasoning by which we get there isn't.
Added:
Modified:
llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index 78c22a903c15..f9a9cce0dda6 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -55,7 +55,7 @@ define void @pr45679(i32* %A) optsize {
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -67,7 +67,7 @@ define void @pr45679(i32* %A) optsize {
; CHECK-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
-; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2
+; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -117,7 +117,7 @@ define void @pr45679(i32* %A) optsize {
; VF2UF2-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF2UF2: middle.block:
; VF2UF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; VF2UF2: scalar.ph:
@@ -129,7 +129,7 @@ define void @pr45679(i32* %A) optsize {
; VF2UF2-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1
; VF2UF2-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
; VF2UF2-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
-; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2
+; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; VF2UF2: exit:
; VF2UF2-NEXT: ret void
;
@@ -139,46 +139,46 @@ define void @pr45679(i32* %A) optsize {
; VF1UF4: vector.ph:
; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]]
; VF1UF4: vector.body:
-; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
-; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
-; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
-; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
-; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13
-; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13
-; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13
-; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13
+; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
+; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
+; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i32 [[INDEX]], 1
+; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i32 [[INDEX]], 2
+; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i32 [[INDEX]], 3
+; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[VEC_IV]], 13
+; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV4]], 13
+; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV5]], 13
+; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], 13
; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; VF1UF4: pred.store.if:
-; VF1UF4-NEXT: [[SUNK_IND0:%.*]] = add i32 [[INDEX]], 0
-; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[SUNK_IND0]]
+; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
+; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]]
; VF1UF4-NEXT: store i32 13, i32* [[TMP4]], align 1
; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]]
; VF1UF4: pred.store.continue:
-; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
+; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; VF1UF4: pred.store.if7:
-; VF1UF4-NEXT: [[SUNK_IND1:%.*]] = add i32 [[INDEX]], 1
-; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND1]]
+; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
+; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]]
; VF1UF4-NEXT: store i32 13, i32* [[TMP5]], align 1
-; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE5]]
+; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]]
; VF1UF4: pred.store.continue8:
-; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
+; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; VF1UF4: pred.store.if9:
-; VF1UF4-NEXT: [[SUNK_IND2:%.*]] = add i32 [[INDEX]], 2
-; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND2]]
+; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
+; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]]
; VF1UF4-NEXT: store i32 13, i32* [[TMP6]], align 1
-; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE7]]
+; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]]
; VF1UF4: pred.store.continue10:
-; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
+; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
; VF1UF4: pred.store.if11:
-; VF1UF4-NEXT: [[SUNK_IND3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[SUNK_IND3]]
+; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
+; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]]
; VF1UF4-NEXT: store i32 13, i32* [[TMP7]], align 1
-; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE9]]
+; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]]
; VF1UF4: pred.store.continue12:
; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; VF1UF4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
+; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VF1UF4: middle.block:
; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; VF1UF4: scalar.ph:
@@ -190,7 +190,7 @@ define void @pr45679(i32* %A) optsize {
; VF1UF4-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1
; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
-; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]]
+; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; VF1UF4: exit:
; VF1UF4-NEXT: ret void
;
@@ -208,3 +208,226 @@ loop:
exit:
ret void
}
+
+define void @load_variant(i64* noalias %a, i64* noalias %b) {
+; CHECK-LABEL: @load_variant(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 13, i64 13, i64 13, i64 13>
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
+; CHECK-NEXT: store i64 [[TMP4]], i64* [[B:%.*]], align 8
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_STORE_IF]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
+; CHECK: pred.store.if1:
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
+; CHECK-NEXT: store i64 [[TMP9]], i64* [[B]], align 8
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
+; CHECK: pred.store.continue2:
+; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP9]], [[PRED_STORE_IF1]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8
+; CHECK-NEXT: store i64 [[TMP14]], i64* [[B]], align 8
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP14]], [[PRED_STORE_IF3]] ]
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.if5:
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = load i64, i64* [[TMP18]], align 8
+; CHECK-NEXT: store i64 [[TMP19]], i64* [[B]], align 8
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue6:
+; CHECK-NEXT: [[TMP20:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP19]], [[PRED_STORE_IF5]] ]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
+; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT: store i64 [[V]], i64* [[B]], align 8
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+; VF2UF2-LABEL: @load_variant(
+; VF2UF2-NEXT: entry:
+; VF2UF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; VF2UF2: vector.ph:
+; VF2UF2-NEXT: br label [[VECTOR_BODY:%.*]]
+; VF2UF2: vector.body:
+; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
+; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ]
+; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], <i64 13, i64 13>
+; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], <i64 13, i64 13>
+; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
+; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; VF2UF2: pred.store.if:
+; VF2UF2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP3]]
+; VF2UF2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
+; VF2UF2-NEXT: store i64 [[TMP5]], i64* [[B:%.*]], align 8
+; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]]
+; VF2UF2: pred.store.continue:
+; VF2UF2-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ]
+; VF2UF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
+; VF2UF2-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
+; VF2UF2: pred.store.if2:
+; VF2UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
+; VF2UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]]
+; VF2UF2-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8
+; VF2UF2-NEXT: store i64 [[TMP10]], i64* [[B]], align 8
+; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE3]]
+; VF2UF2: pred.store.continue3:
+; VF2UF2-NEXT: [[TMP11:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP10]], [[PRED_STORE_IF2]] ]
+; VF2UF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
+; VF2UF2-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
+; VF2UF2: pred.store.if4:
+; VF2UF2-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2
+; VF2UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP13]]
+; VF2UF2-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
+; VF2UF2-NEXT: store i64 [[TMP15]], i64* [[B]], align 8
+; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]]
+; VF2UF2: pred.store.continue5:
+; VF2UF2-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE3]] ], [ [[TMP15]], [[PRED_STORE_IF4]] ]
+; VF2UF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
+; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
+; VF2UF2: pred.store.if6:
+; VF2UF2-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3
+; VF2UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP18]]
+; VF2UF2-NEXT: [[TMP20:%.*]] = load i64, i64* [[TMP19]], align 8
+; VF2UF2-NEXT: store i64 [[TMP20]], i64* [[B]], align 8
+; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]]
+; VF2UF2: pred.store.continue7:
+; VF2UF2-NEXT: [[TMP21:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE5]] ], [ [[TMP20]], [[PRED_STORE_IF6]] ]
+; VF2UF2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
+; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF2UF2-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF2UF2: middle.block:
+; VF2UF2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; VF2UF2: scalar.ph:
+; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; VF2UF2-NEXT: br label [[FOR_BODY:%.*]]
+; VF2UF2: for.body:
+; VF2UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; VF2UF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
+; VF2UF2-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; VF2UF2-NEXT: store i64 [[V]], i64* [[B]], align 8
+; VF2UF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; VF2UF2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
+; VF2UF2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF2UF2: for.end:
+; VF2UF2-NEXT: ret void
+;
+; VF1UF4-LABEL: @load_variant(
+; VF1UF4-NEXT: entry:
+; VF1UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; VF1UF4: vector.ph:
+; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]]
+; VF1UF4: vector.body:
+; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
+; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0
+; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1
+; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2
+; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3
+; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 13
+; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 13
+; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 13
+; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 13
+; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; VF1UF4: pred.store.if:
+; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
+; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDUCTION]]
+; VF1UF4-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
+; VF1UF4-NEXT: store i64 [[TMP5]], i64* [[B:%.*]], align 8
+; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]]
+; VF1UF4: pred.store.continue:
+; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; VF1UF4: pred.store.if7:
+; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
+; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION1]]
+; VF1UF4-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+; VF1UF4-NEXT: store i64 [[TMP7]], i64* [[B]], align 8
+; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; VF1UF4: pred.store.continue8:
+; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
+; VF1UF4: pred.store.if9:
+; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
+; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION2]]
+; VF1UF4-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
+; VF1UF4-NEXT: store i64 [[TMP9]], i64* [[B]], align 8
+; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]]
+; VF1UF4: pred.store.continue10:
+; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
+; VF1UF4: pred.store.if11:
+; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
+; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION3]]
+; VF1UF4-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
+; VF1UF4-NEXT: store i64 [[TMP11]], i64* [[B]], align 8
+; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]]
+; VF1UF4: pred.store.continue12:
+; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF1UF4: middle.block:
+; VF1UF4-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; VF1UF4: scalar.ph:
+; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; VF1UF4-NEXT: br label [[FOR_BODY:%.*]]
+; VF1UF4: for.body:
+; VF1UF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
+; VF1UF4-NEXT: [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; VF1UF4-NEXT: store i64 [[V]], i64* [[B]], align 8
+; VF1UF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; VF1UF4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
+; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF1UF4: for.end:
+; VF1UF4-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
+ %v = load i64, i64* %arrayidx
+ store i64 %v, i64* %b
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 14
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
More information about the llvm-commits
mailing list