[llvm] 0d9e8f5 - [VPlan] Add more sinking/merging tests with predicated loads/stores.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 12 07:37:08 PDT 2021
Author: Florian Hahn
Date: 2021-06-12T15:36:51+01:00
New Revision: 0d9e8f5f4b68252c6caa1ef81a30777b2f5d7242
URL: https://github.com/llvm/llvm-project/commit/0d9e8f5f4b68252c6caa1ef81a30777b2f5d7242
DIFF: https://github.com/llvm/llvm-project/commit/0d9e8f5f4b68252c6caa1ef81a30777b2f5d7242.diff
LOG: [VPlan] Add more sinking/merging tests with predicated loads/stores.
Added:
Modified:
llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 560971513733a..a3799c82d7dc2 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -232,50 +232,52 @@ define void @uniform_gep(i64 %k, i16* noalias %A, i16* noalias %B) {
; CHECK-NEXT: EMIT vp<%3> = icmp ule vp<%2> vp<%0>
; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr ir<%A>, ir<0>
; CHECK-NEXT: Successor(s): pred.load
-
-; CHECK: <xVFxUF> pred.load: {
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.load: {
; CHECK-NEXT: pred.load.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%3>
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-NEXT: CondBit: vp<%3> (loop)
-
-; CHECK: pred.load.if:
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%lv> = load ir<%gep.A.uniform>
; CHECK-NEXT: Successor(s): pred.load.continue
-
-; CHECK: pred.load.continue:
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%lv>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-
-; CHECK: loop.0:
+; CHECK-NEXT: Successor(s): loop.0
+; CHECK-EMPTY:
+; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN ir<%cmp> = icmp ir<%iv>, ir<%k>
; CHECK-NEXT: Successor(s): loop.then
-
-; CHECK: loop.then:
+; CHECK-EMPTY:
+; CHECK-NEXT: loop.then:
; CHECK-NEXT: EMIT vp<%8> = not ir<%cmp>
; CHECK-NEXT: EMIT vp<%9> = select vp<%3> vp<%8> ir<false>
; CHECK-NEXT: Successor(s): pred.store
-
-; CHECK: <xVFxUF> pred.store: {
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
; CHECK-NEXT: pred.store.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<%9>
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-NEXT: CondBit: vp<%9> (loop.then)
-
-; CHECK: pred.store.if:
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%gep.B> = getelementptr ir<%B>, ir<%iv>
; CHECK-NEXT: REPLICATE store vp<%6>, ir<%gep.B>
; CHECK-NEXT: Successor(s): pred.store.continue
-
-; CHECK: pred.store.continue:
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-
-; CHECK: loop.then.0:
+; CHECK-NEXT: Successor(s): loop.then.0
+; CHECK-EMPTY:
+; CHECK-NEXT: loop.then.0:
; CHECK-NEXT: Successor(s): loop.latch
-
-; CHECK: loop.latch:
+; CHECK-EMPTY:
+; CHECK-NEXT: loop.latch:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
;
@@ -298,6 +300,309 @@ loop.latch:
%iv.next = add nsw i64 %iv, 1
%cmp179 = icmp slt i64 %iv.next, 32
br i1 %cmp179, label %loop, label %exit
+exit:
+ ret void
+}
+
+; Loop with predicated load.
+define void @pred_cfg1(i32 %k, i32 %j) {
+; CHECK-LABEL: LV: Checking a loop in "pred_cfg1"
+; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
+; CHECK-NEXT: loop:
+; CHECK-NEXT: WIDEN-INDUCTION %indvars.iv = phi 0, %indvars.iv.next
+; CHECK-NEXT: WIDEN ir<%c.1> = icmp ir<%indvars.iv>, ir<%j>
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%indvars.iv>, ir<10>
+; CHECK-NEXT: Successor(s): then.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.0:
+; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%indvars.iv> vp<%0>
+; CHECK-NEXT: EMIT vp<%5> = select vp<%4> ir<%c.1> ir<false>
+; CHECK-NEXT: Successor(s): pred.load
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.load: {
+; CHECK-NEXT: pred.load.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<%5>
+; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
+; CHECK-NEXT: CondBit: vp<%5> (then.0)
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.if:
+; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%indvars.iv>
+; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
+; CHECK-NEXT: Successor(s): pred.load.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.continue:
+; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): then.0.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.0.0:
+; CHECK-NEXT: Successor(s): next.0
+; CHECK-EMPTY:
+; CHECK-NEXT: next.0:
+; CHECK-NEXT: EMIT vp<%9> = not ir<%c.1>
+; CHECK-NEXT: EMIT vp<%10> = select vp<%4> vp<%9> ir<false>
+; CHECK-NEXT: BLEND %p = ir<0>/vp<%10> vp<%8>/vp<%5>
+; CHECK-NEXT: EMIT vp<%12> = or vp<%5> vp<%10>
+; CHECK-NEXT: Successor(s): pred.store
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<%12>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-NEXT: CondBit: vp<%12> (next.0)
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
+; CHECK-NEXT: REPLICATE store ir<%p>, ir<%gep.a>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): next.0.0
+; CHECK-EMPTY:
+; CHECK-NEXT: next.0.0:
+; CHECK-NEXT: CLONE ir<%large> = icmp ir<%indvars.iv>, ir<8>
+; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%indvars.iv>, ir<%k>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %next.0 ]
+ %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %indvars.iv
+ %c.1 = icmp ult i32 %indvars.iv, %j
+ %mul = mul i32 %indvars.iv, 10
+ %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
+ br i1 %c.1, label %then.0, label %next.0
+
+then.0:
+ %lv.b = load i32, i32* %gep.b, align 4
+ br label %next.0
+
+next.0:
+ %p = phi i32 [ 0, %loop ], [ %lv.b, %then.0 ]
+ store i32 %p, i32* %gep.a, align 4
+ %indvars.iv.next = add i32 %indvars.iv, 1
+ %large = icmp sge i32 %indvars.iv, 8
+ %exitcond = icmp eq i32 %indvars.iv, %k
+ %realexit = or i1 %large, %exitcond
+ br i1 %realexit, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Loop with predicated load and store in separate blocks, store depends on
+; loaded value.
+define void @pred_cfg2(i32 %k, i32 %j) {
+; CHECK-LABEL: LV: Checking a loop in "pred_cfg2"
+; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
+; CHECK-NEXT: loop:
+; CHECK-NEXT: WIDEN-INDUCTION %indvars.iv = phi 0, %indvars.iv.next
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%indvars.iv>, ir<10>
+; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%indvars.iv>, ir<%j>
+; CHECK-NEXT: WIDEN ir<%c.1> = icmp ir<%indvars.iv>, ir<%j>
+; CHECK-NEXT: Successor(s): then.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.0:
+; CHECK-NEXT: EMIT vp<%5> = icmp ule ir<%indvars.iv> vp<%0>
+; CHECK-NEXT: EMIT vp<%6> = select vp<%5> ir<%c.0> ir<false>
+; CHECK-NEXT: Successor(s): pred.load
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.load: {
+; CHECK-NEXT: pred.load.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<%6>
+; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
+; CHECK-NEXT: CondBit: vp<%6> (then.0)
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.if:
+; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%indvars.iv>
+; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
+; CHECK-NEXT: Successor(s): pred.load.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.continue:
+; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.b>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): then.0.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.0.0:
+; CHECK-NEXT: Successor(s): next.0
+; CHECK-EMPTY:
+; CHECK-NEXT: next.0:
+; CHECK-NEXT: EMIT vp<%10> = not ir<%c.0>
+; CHECK-NEXT: EMIT vp<%11> = select vp<%5> vp<%10> ir<false>
+; CHECK-NEXT: BLEND %p = ir<0>/vp<%11> vp<%9>/vp<%6>
+; CHECK-NEXT: Successor(s): then.1
+; CHECK-EMPTY:
+; CHECK-NEXT: then.1:
+; CHECK-NEXT: EMIT vp<%13> = or vp<%6> vp<%11>
+; CHECK-NEXT: EMIT vp<%14> = select vp<%13> ir<%c.1> ir<false>
+; CHECK-NEXT: Successor(s): pred.store
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<%14>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-NEXT: CondBit: vp<%14> (then.1)
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
+; CHECK-NEXT: REPLICATE store ir<%p>, ir<%gep.a>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): then.1.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.1.0:
+; CHECK-NEXT: Successor(s): next.1
+; CHECK-EMPTY:
+; CHECK-NEXT: next.1:
+; CHECK-NEXT: CLONE ir<%large> = icmp ir<%indvars.iv>, ir<8>
+; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%indvars.iv>, ir<%k>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %next.1 ]
+ %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %indvars.iv
+ %mul = mul i32 %indvars.iv, 10
+ %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
+ %c.0 = icmp ult i32 %indvars.iv, %j
+ %c.1 = icmp ugt i32 %indvars.iv, %j
+ br i1 %c.0, label %then.0, label %next.0
+
+then.0:
+ %lv.b = load i32, i32* %gep.b, align 4
+ br label %next.0
+
+next.0:
+ %p = phi i32 [ 0, %loop ], [ %lv.b, %then.0 ]
+ br i1 %c.1, label %then.1, label %next.1
+
+then.1:
+ store i32 %p, i32* %gep.a, align 4
+ br label %next.1
+
+next.1:
+ %indvars.iv.next = add i32 %indvars.iv, 1
+ %large = icmp sge i32 %indvars.iv, 8
+ %exitcond = icmp eq i32 %indvars.iv, %k
+ %realexit = or i1 %large, %exitcond
+ br i1 %realexit, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Loop with predicated load and store in separate blocks, store does not depend
+; on loaded value.
+define void @pred_cfg3(i32 %k, i32 %j) {
+; CHECK-LABEL: LV: Checking a loop in "pred_cfg3"
+; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
+; CHECK-NEXT: loop:
+; CHECK-NEXT: WIDEN-INDUCTION %indvars.iv = phi 0, %indvars.iv.next
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%indvars.iv>, ir<10>
+; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%indvars.iv>, ir<%j>
+; CHECK-NEXT: Successor(s): then.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.0:
+; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%indvars.iv> vp<%0>
+; CHECK-NEXT: EMIT vp<%5> = select vp<%4> ir<%c.0> ir<false>
+; CHECK-NEXT: Successor(s): pred.load
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.load: {
+; CHECK-NEXT: pred.load.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<%5>
+; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
+; CHECK-NEXT: CondBit: vp<%5> (then.0)
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.if:
+; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%indvars.iv>
+; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
+; CHECK-NEXT: Successor(s): pred.load.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.load.continue:
+; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): then.0.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.0.0:
+; CHECK-NEXT: Successor(s): next.0
+; CHECK-EMPTY:
+; CHECK-NEXT: next.0:
+; CHECK-NEXT: Successor(s): then.1
+; CHECK-EMPTY:
+; CHECK-NEXT: then.1:
+; CHECK-NEXT: EMIT vp<%9> = not ir<%c.0>
+; CHECK-NEXT: EMIT vp<%10> = select vp<%4> vp<%9> ir<false>
+; CHECK-NEXT: EMIT vp<%11> = or vp<%5> vp<%10>
+; CHECK-NEXT: EMIT vp<%12> = select vp<%11> ir<%c.0> ir<false>
+; CHECK-NEXT: Successor(s): pred.store
+; CHECK-EMPTY:
+; CHECK-NEXT: <xVFxUF> pred.store: {
+; CHECK-NEXT: pred.store.entry:
+; CHECK-NEXT: BRANCH-ON-MASK vp<%12>
+; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
+; CHECK-NEXT: CondBit: vp<%12> (then.1)
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.if:
+; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
+; CHECK-NEXT: REPLICATE store ir<0>, ir<%gep.a>
+; CHECK-NEXT: Successor(s): pred.store.continue
+; CHECK-EMPTY:
+; CHECK-NEXT: pred.store.continue:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): then.1.0
+; CHECK-EMPTY:
+; CHECK-NEXT: then.1.0:
+; CHECK-NEXT: Successor(s): next.1
+; CHECK-EMPTY:
+; CHECK-NEXT: next.1:
+; CHECK-NEXT: CLONE ir<%large> = icmp ir<%indvars.iv>, ir<8>
+; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%indvars.iv>, ir<%k>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %next.1 ]
+ %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %indvars.iv
+ %mul = mul i32 %indvars.iv, 10
+ %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
+ %c.0 = icmp ult i32 %indvars.iv, %j
+ br i1 %c.0, label %then.0, label %next.0
+
+then.0:
+ %lv.b = load i32, i32* %gep.b, align 4
+ br label %next.0
+
+next.0:
+ br i1 %c.0, label %then.1, label %next.1
+
+then.1:
+ store i32 0, i32* %gep.a, align 4
+ br label %next.1
+
+next.1:
+ %indvars.iv.next = add i32 %indvars.iv, 1
+ %large = icmp sge i32 %indvars.iv, 8
+ %exitcond = icmp eq i32 %indvars.iv, %k
+ %realexit = or i1 %large, %exitcond
+ br i1 %realexit, label %exit, label %loop
exit:
ret void
More information about the llvm-commits
mailing list