[llvm] 1f46499 - [SVE][LoopVectorize] Verify support for vectorizing loops with invariant loads

Thu Mar 25 07:28:12 PDT 2021

Author: Kerry McLaughlin
Date: 2021-03-25T14:10:21Z
New Revision: 1f4649969062fd2b43dcc09c79eb977c83749caa

URL: https://github.com/llvm/llvm-project/commit/1f4649969062fd2b43dcc09c79eb977c83749caa
DIFF: https://github.com/llvm/llvm-project/commit/1f4649969062fd2b43dcc09c79eb977c83749caa.diff

LOG: [SVE][LoopVectorize] Verify support for vectorizing loops with invariant loads

D95598 added a cost model for broadcast shuffle, which should enable loops
such as the following to vectorize, where the load of b[42] is invariant
and can be done using a scalar load + splat:

  for (int i=0; i<n; ++i)
    a[i] = b[i] + b[42];

This patch adds tests to verify that we can vectorize such loops.

Reviewed By: joechrisellis

Differential Revision: https://reviews.llvm.org/D98506

Added: 
    llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll

Modified: 
    llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
index 37e70adf64f8e..2536c6c85deab 100644

--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -76,6 +76,47 @@ exit:                        ; preds = %for.inc
   ret void
 }
 
+define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) {
+; CHECK-LABEL: @invariant_load_cond
+; CHECK: vector.body
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
+; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %[[GEP]], i32 0
+; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32*> %[[SPLATINS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK: %[[MASKED_LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[BITCAST:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> poison)
+; CHECK-NEXT: %[[MASKED_GATHER:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[SPLAT]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> undef)
+; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[MASKED_GATHER]], %[[MASKED_LOAD]]
+; CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32>* %[[BITCAST1:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]])
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 42
+  %arrayidx2 = getelementptr inbounds i32, i32* %cond, i64 %iv
+  %0 = load i32, i32* %arrayidx2, align 4
+  %tobool.not = icmp eq i32 %0, 0
+  br i1 %tobool.not, label %for.inc, label %if.then
+
+if.then:
+  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %iv
+  %1 = load i32, i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %2, %1
+  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %iv
+  store i32 %add, i32* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
 !0 = distinct !{!0, !1, !2, !3, !4, !5}
 !1 = !{!"llvm.loop.mustprogress"}
 !2 = !{!"llvm.loop.vectorize.width", i32 4}

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
new file mode 100644
index 0000000000000..2514fddc3e38c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
+; CHECK-LABEL: @invariant_load
+; CHECK: vector.body:
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
+; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, i32* %[[GEP]]
+; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i32 0
+; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[SPLAT]], %[[LOAD]]
+; CHECK: store <vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32>*
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 42
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %0, %1
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %iv
+  store i32 %add, i32* %arrayidx2, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!1 = distinct !{!1, !2, !3, !4}
+!2 = !{!"llvm.loop.vectorize.width", i32 4}
+!3 = !{!"llvm.loop.interleave.count", i32 1}
+!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}