[llvm] 1f46499 - [SVE][LoopVectorize] Verify support for vectorizing loops with invariant loads
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 25 07:28:12 PDT 2021
Author: Kerry McLaughlin
Date: 2021-03-25T14:10:21Z
New Revision: 1f4649969062fd2b43dcc09c79eb977c83749caa
URL: https://github.com/llvm/llvm-project/commit/1f4649969062fd2b43dcc09c79eb977c83749caa
DIFF: https://github.com/llvm/llvm-project/commit/1f4649969062fd2b43dcc09c79eb977c83749caa.diff
LOG: [SVE][LoopVectorize] Verify support for vectorizing loops with invariant loads
D95598 added a cost model for broadcast shuffle, which should enable loops
such as the following to vectorize, where the load of b[42] is invariant
and can be done using a scalar load + splat:
for (int i=0; i<n; ++i)
a[i] = b[i] + b[42];
This patch adds tests to verify that we can vectorize such loops.
Reviewed By: joechrisellis
Differential Revision: https://reviews.llvm.org/D98506
Added:
llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
Modified:
llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
index 37e70adf64f8e..2536c6c85deab 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -76,6 +76,47 @@ exit: ; preds = %for.inc
ret void
}
+define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) {
+; CHECK-LABEL: @invariant_load_cond
+; CHECK: vector.body
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
+; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %[[GEP]], i32 0
+; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32*> %[[SPLATINS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK: %[[MASKED_LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[BITCAST:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> poison)
+; CHECK-NEXT: %[[MASKED_GATHER:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[SPLAT]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> undef)
+; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[MASKED_GATHER]], %[[MASKED_LOAD]]
+; CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32>* %[[BITCAST1:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]])
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 42
+ %arrayidx2 = getelementptr inbounds i32, i32* %cond, i64 %iv
+ %0 = load i32, i32* %arrayidx2, align 4
+ %tobool.not = icmp eq i32 %0, 0
+ br i1 %tobool.not, label %for.inc, label %if.then
+
+if.then:
+ %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %iv
+ %1 = load i32, i32* %arrayidx3, align 4
+ %2 = load i32, i32* %arrayidx1, align 4
+ %add = add nsw i32 %2, %1
+ %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %iv
+ store i32 %add, i32* %arrayidx4, align 4
+ br label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+ ret void
+}
+
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
new file mode 100644
index 0000000000000..2514fddc3e38c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
+; CHECK-LABEL: @invariant_load
+; CHECK: vector.body:
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
+; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, i32* %[[GEP]]
+; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i32 0
+; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[SPLAT]], %[[LOAD]]
+; CHECK: store <vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32>*
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 42
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv
+ %1 = load i32, i32* %arrayidx1, align 4
+ %add = add nsw i32 %0, %1
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %iv
+ store i32 %add, i32* %arrayidx2, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %n
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!1 = distinct !{!1, !2, !3, !4}
+!2 = !{!"llvm.loop.vectorize.width", i32 4}
+!3 = !{!"llvm.loop.interleave.count", i32 1}
+!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
More information about the llvm-commits
mailing list