[llvm] [LV] Linear argument tests for vectorization of function calls (PR #73936)
Graham Hunter via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 05:42:58 PST 2023
https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/73936
>From 557b422bbcb5c2f2051c806a99c8d2e249717525 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 11 Oct 2023 17:06:09 +0100
Subject: [PATCH 1/3] [LV] Linear argument tests for vectorization of function
calls
---
.../AArch64/vector-call-linear-args.ll | 275 ++++++++++++++++++
1 file changed, 275 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
new file mode 100644
index 0000000000000..ef6b8e1d83f38
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -S | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; A call whose argument can remain a scalar because it's sequential and only the
+; starting value is required.
+define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+ %call = call i64 @foo(ptr %gepb) #1
+ %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %call, ptr %gepa
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_with_mask
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+ %call = call i64 @foo(ptr %gepb) #2
+ %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %call, ptr %gepa
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_with_vector
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[DATA:%.*]] = load i32, ptr [[GEPC]], align 8
+; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @baz(i32 [[DATA]], ptr [[GEPB]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepc = getelementptr i32, ptr %c, i64 %indvars.iv
+ %data = load i32, ptr %gepc, align 8
+ %gepb = getelementptr i32, ptr %b, i64 %indvars.iv
+ %call = call i32 @baz(i32 %data, ptr %gepb) #3
+ %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %call, ptr %gepa, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_linear_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_bad_stride
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+ %call = call i64 @foo(ptr %gepb) #4
+ %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %call, ptr %gepa
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_linear_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_wide_stride
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[DOUBLE:%.*]] = mul i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[DOUBLE]]
+; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4]]
+; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %double = mul i64 %indvars.iv, 2
+ %gepb = getelementptr i64, ptr %b, i64 %double
+ %call = call i64 @foo(ptr %gepb) #4
+ %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %call, ptr %gepa
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_linear_mixed_types(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_mixed_types
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @quux(ptr [[GEPC]], ptr [[GEPB]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepc = getelementptr i32, ptr %c, i64 %indvars.iv
+ %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+ %call = call i32 @quux(ptr %gepc, ptr %gepb) #5
+ %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %call, ptr %gepa, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_linear_non_ptr(ptr noalias %a, i64 %n) #0 {
+; CHECK-LABEL: define void @test_linear_non_ptr
+; CHECK-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[LITTLE_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[TREBLED:%.*]] = mul i32 [[LITTLE_IV]], 3
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @bar(i32 [[TREBLED]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %little.iv = trunc i64 %indvars.iv to i32
+ %trebled = mul i32 %little.iv, 3
+ %call = call i32 @bar(i32 %trebled) #6
+ %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %call, ptr %gepa
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+declare i64 @foo(ptr)
+declare i32 @baz(i32, ptr)
+declare i32 @quux(ptr, ptr)
+declare i32 @bar(i32)
+
+; vector variants of foo
+declare <vscale x 2 x i64> @foo_linear(ptr, <vscale x 2 x i1>)
+declare <vscale x 2 x i64> @foo_linear_nomask(ptr)
+declare <vscale x 2 x i64> @foo_linear_nomask_2x(ptr)
+declare <vscale x 4 x i32> @baz_vector_and_linear(<vscale x 4 x i32>, ptr)
+declare <vscale x 4 x i32> @quux_linear_mask(ptr, ptr, <vscale x 4 x i1>)
+declare <vscale x 4 x i32> @bar_linear(i32)
+
+attributes #0 = { "target-features"="+sve" }
+attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(foo_linear_nomask)" }
+attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsMxl8_foo(foo_linear)" }
+attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(baz_vector_and_linear)" }
+attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(foo_linear_nomask_2x)" }
+attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(quux_linear_mask)" }
+attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(bar_linear)" }
>From 62323944c4a6447dab25145de7dd816a54e499c4 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 1 Dec 2023 16:06:25 +0000
Subject: [PATCH 2/3] More run lines, neon mappings, negative stride test
---
.../AArch64/vector-call-linear-args.ll | 300 ++++++++----------
1 file changed, 139 insertions(+), 161 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ef6b8e1d83f38..ba9d57e1e4a16 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -1,26 +1,24 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON
+; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF
target triple = "aarch64-unknown-linux-gnu"
; A call whose argument can remain a scalar because it's sequential and only the
; starting value is required.
-define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]]
-; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: ret void
+define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) {
+; NEON-LABEL: define void @test_linear
+; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
+; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; SVE_TF-LABEL: define void @test_linear
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
;
entry:
br label %for.body
@@ -28,9 +26,9 @@ entry:
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%gepb = getelementptr i64, ptr %b, i64 %indvars.iv
- %call = call i64 @foo(ptr %gepb) #1
+ %data = call i64 @foo(ptr %gepb) #0
%gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
- store i64 %call, ptr %gepa
+ store i64 %data, ptr %gepa
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -39,32 +37,30 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear_with_mask
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: ret void
+define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
+; NEON-LABEL: define void @test_linear_with_vector
+; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
+; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear_with_vector
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; SVE_TF-LABEL: define void @test_linear_with_vector
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
- %call = call i64 @foo(ptr %gepb) #2
- %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
- store i64 %call, ptr %gepa
+ %gepc = getelementptr i32, ptr %c, i64 %indvars.iv
+ %input = load i32, ptr %gepc, align 8
+ %gepb = getelementptr i32, ptr %b, i64 %indvars.iv
+ %data = call i32 @baz(i32 %input, ptr %gepb) #1
+ %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %data, ptr %gepa, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -73,36 +69,28 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear_with_vector
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[DATA:%.*]] = load i32, ptr [[GEPC]], align 8
-; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @baz(i32 [[DATA]], ptr [[GEPB]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: ret void
+define void @test_linear_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
+; NEON-LABEL: define void @test_linear_bad_stride
+; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
+; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear_bad_stride
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; SVE_TF-LABEL: define void @test_linear_bad_stride
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %gepc = getelementptr i32, ptr %c, i64 %indvars.iv
- %data = load i32, ptr %gepc, align 8
- %gepb = getelementptr i32, ptr %b, i64 %indvars.iv
- %call = call i32 @baz(i32 %data, ptr %gepb) #3
- %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
- store i32 %call, ptr %gepa, align 8
+ %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+ %data = call i64 @foo(ptr %gepb) #2
+ %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %data, ptr %gepa
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -111,32 +99,29 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear_bad_stride
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4:[0-9]+]]
-; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: ret void
+define void @test_linear_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
+; NEON-LABEL: define void @test_linear_wide_stride
+; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
+; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear_wide_stride
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
+;
+; SVE_TF-LABEL: define void @test_linear_wide_stride
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
- %call = call i64 @foo(ptr %gepb) #4
+ %double = mul i64 %indvars.iv, 2
+ %gepb = getelementptr i64, ptr %b, i64 %double
+ %data = call i64 @foo(ptr %gepb) #2
%gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
- store i64 %call, ptr %gepa
+ store i64 %data, ptr %gepa
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -145,34 +130,29 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear_wide_stride
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[DOUBLE:%.*]] = mul i64 [[INDVARS_IV]], 2
-; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[DOUBLE]]
-; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4]]
-; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: ret void
+define void @test_linear_mixed_types(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
+; NEON-LABEL: define void @test_linear_mixed_types
+; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
+; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear_mixed_types
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; SVE_TF-LABEL: define void @test_linear_mixed_types
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %double = mul i64 %indvars.iv, 2
- %gepb = getelementptr i64, ptr %b, i64 %double
- %call = call i64 @foo(ptr %gepb) #4
- %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
- store i64 %call, ptr %gepa
+ %gepc = getelementptr i32, ptr %c, i64 %indvars.iv
+ %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
+ %data = call i32 @quux(ptr %gepc, ptr %gepb) #3
+ %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %data, ptr %gepa, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -181,34 +161,29 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_mixed_types(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear_mixed_types
-; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @quux(ptr [[GEPC]], ptr [[GEPB]]) #[[ATTR5:[0-9]+]]
-; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 8
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: ret void
+define void @test_linear_non_ptr(ptr noalias %a, i64 %n) {
+; NEON-LABEL: define void @test_linear_non_ptr
+; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
+; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear_non_ptr
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
+;
+; SVE_TF-LABEL: define void @test_linear_non_ptr
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
;
entry:
br label %for.body
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %gepc = getelementptr i32, ptr %c, i64 %indvars.iv
- %gepb = getelementptr i64, ptr %b, i64 %indvars.iv
- %call = call i32 @quux(ptr %gepc, ptr %gepb) #5
+ %little.iv = trunc i64 %indvars.iv to i32
+ %trebled = mul i32 %little.iv, 3
+ %data = call i32 @bar(i32 %trebled) #4
%gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
- store i32 %call, ptr %gepa, align 8
+ store i32 %data, ptr %gepa
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -217,23 +192,18 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_non_ptr(ptr noalias %a, i64 %n) #0 {
-; CHECK-LABEL: define void @test_linear_non_ptr
-; CHECK-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[FOR_BODY:%.*]]
-; CHECK: for.body:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[LITTLE_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT: [[TREBLED:%.*]] = mul i32 [[LITTLE_IV]], 3
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @bar(i32 [[TREBLED]]) #[[ATTR6:[0-9]+]]
-; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 4
-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK: for.cond.cleanup:
-; CHECK-NEXT: ret void
+define void @test_linear_non_ptr_neg_stride(ptr noalias %a, i64 %n) {
+; NEON-LABEL: define void @test_linear_non_ptr_neg_stride
+; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
+; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]]
+;
+; SVE_OR_NEON-LABEL: define void @test_linear_non_ptr_neg_stride
+; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
+;
+; SVE_TF-LABEL: define void @test_linear_non_ptr_neg_stride
+; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
;
entry:
br label %for.body
@@ -241,10 +211,10 @@ entry:
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%little.iv = trunc i64 %indvars.iv to i32
- %trebled = mul i32 %little.iv, 3
- %call = call i32 @bar(i32 %trebled) #6
+ %negstride = mul i32 %little.iv, -5
+ %data = call i32 @bar(i32 %negstride) #5
%gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
- store i32 %call, ptr %gepa
+ store i32 %data, ptr %gepa
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %n
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -258,18 +228,26 @@ declare i32 @baz(i32, ptr)
declare i32 @quux(ptr, ptr)
declare i32 @bar(i32)
-; vector variants of foo
-declare <vscale x 2 x i64> @foo_linear(ptr, <vscale x 2 x i1>)
-declare <vscale x 2 x i64> @foo_linear_nomask(ptr)
-declare <vscale x 2 x i64> @foo_linear_nomask_2x(ptr)
-declare <vscale x 4 x i32> @baz_vector_and_linear(<vscale x 4 x i32>, ptr)
-declare <vscale x 4 x i32> @quux_linear_mask(ptr, ptr, <vscale x 4 x i1>)
-declare <vscale x 4 x i32> @bar_linear(i32)
-
-attributes #0 = { "target-features"="+sve" }
-attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(foo_linear_nomask)" }
-attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsMxl8_foo(foo_linear)" }
-attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(baz_vector_and_linear)" }
-attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(foo_linear_nomask_2x)" }
-attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(quux_linear_mask)" }
-attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(bar_linear)" }
+; neon vector variants of foo
+declare <2 x i64> @neon_foo_linear(ptr)
+declare <2 x i64> @neon_foo_linear_2x(ptr)
+declare <4 x i32> @neon_baz_vector_and_linear(<4 x i32>, ptr)
+declare <4 x i32> @neon_quux_linear(ptr, ptr)
+declare <4 x i32> @neon_bar_linear(i32)
+declare <4 x i32> @neon_bar_neg_linear(i32)
+
+; scalable vector variants of foo
+declare <vscale x 2 x i64> @sve_foo_linear(ptr, <vscale x 2 x i1>)
+declare <vscale x 2 x i64> @sve_foo_linear_nomask(ptr)
+declare <vscale x 2 x i64> @sve_foo_linear_nomask_2x(ptr)
+declare <vscale x 4 x i32> @sve_baz_vector_and_linear(<vscale x 4 x i32>, ptr)
+declare <vscale x 4 x i32> @sve_quux_linear_mask(ptr, ptr, <vscale x 4 x i1>)
+declare <vscale x 4 x i32> @sve_bar_linear(i32)
+declare <vscale x 4 x i32> @sve_bar_neg_linear(i32)
+
+attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(sve_foo_linear_nomask),_ZGVsMxl8_foo(sve_foo_linear),_ZGVnN2l8_foo(neon_foo_linear)" }
+attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(sve_baz_vector_and_linear),_ZGVnN4vl4_baz(neon_baz_vector_and_linear)" }
+attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(sve_foo_linear_nomask_2x),_ZGVnN2l16_foo(neon_foo_linear_2x)" }
+attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(sve_quux_linear_mask),_ZGVnN4l4l8_quux(neon_quux_linear)" }
+attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(sve_bar_linear),_ZGVnN4l3_bar(neon_bar_linear)" }
+attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(sve_bar_neg_linear),_ZGVnN4ln5_bar(neon_bar_neg_linear)" }
>From 6ca60eaa156477588e3a78f53e89833c5c4e4204 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 5 Dec 2023 12:59:33 +0000
Subject: [PATCH 3/3] More precise test and variant names
---
.../AArch64/vector-call-linear-args.ll | 96 +++++++++----------
1 file changed, 48 insertions(+), 48 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
index ba9d57e1e4a16..cd133371f66ce 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll
@@ -7,16 +7,16 @@ target triple = "aarch64-unknown-linux-gnu"
; A call whose argument can remain a scalar because it's sequential and only the
; starting value is required.
-define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) {
-; NEON-LABEL: define void @test_linear
+define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
+; NEON-LABEL: define void @test_linear8
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
;
-; SVE_OR_NEON-LABEL: define void @test_linear
+; SVE_OR_NEON-LABEL: define void @test_linear8
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
;
-; SVE_TF-LABEL: define void @test_linear
+; SVE_TF-LABEL: define void @test_linear8
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
;
@@ -37,16 +37,16 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
-; NEON-LABEL: define void @test_linear_with_vector
+define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
+; NEON-LABEL: define void @test_vector_linear4
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
;
-; SVE_OR_NEON-LABEL: define void @test_linear_with_vector
+; SVE_OR_NEON-LABEL: define void @test_vector_linear4
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
;
-; SVE_TF-LABEL: define void @test_linear_with_vector
+; SVE_TF-LABEL: define void @test_vector_linear4
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
;
@@ -69,16 +69,16 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
-; NEON-LABEL: define void @test_linear_bad_stride
+define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
+; NEON-LABEL: define void @test_linear8_bad_stride
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
;
-; SVE_OR_NEON-LABEL: define void @test_linear_bad_stride
+; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
;
-; SVE_TF-LABEL: define void @test_linear_bad_stride
+; SVE_TF-LABEL: define void @test_linear8_bad_stride
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
;
@@ -99,16 +99,16 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
-; NEON-LABEL: define void @test_linear_wide_stride
+define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
+; NEON-LABEL: define void @test_linear16_wide_stride
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
;
-; SVE_OR_NEON-LABEL: define void @test_linear_wide_stride
+; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
;
-; SVE_TF-LABEL: define void @test_linear_wide_stride
+; SVE_TF-LABEL: define void @test_linear16_wide_stride
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
;
@@ -130,16 +130,16 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_mixed_types(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
-; NEON-LABEL: define void @test_linear_mixed_types
+define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
+; NEON-LABEL: define void @test_linear4_linear8
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
;
-; SVE_OR_NEON-LABEL: define void @test_linear_mixed_types
+; SVE_OR_NEON-LABEL: define void @test_linear4_linear8
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
;
-; SVE_TF-LABEL: define void @test_linear_mixed_types
+; SVE_TF-LABEL: define void @test_linear4_linear8
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
;
@@ -161,16 +161,16 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_non_ptr(ptr noalias %a, i64 %n) {
-; NEON-LABEL: define void @test_linear_non_ptr
+define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) {
+; NEON-LABEL: define void @test_linear3_non_ptr
; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]]
;
-; SVE_OR_NEON-LABEL: define void @test_linear_non_ptr
+; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
;
-; SVE_TF-LABEL: define void @test_linear_non_ptr
+; SVE_TF-LABEL: define void @test_linear3_non_ptr
; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
;
@@ -192,16 +192,16 @@ for.cond.cleanup:
ret void
}
-define void @test_linear_non_ptr_neg_stride(ptr noalias %a, i64 %n) {
-; NEON-LABEL: define void @test_linear_non_ptr_neg_stride
+define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) {
+; NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]]
;
-; SVE_OR_NEON-LABEL: define void @test_linear_non_ptr_neg_stride
+; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
;
-; SVE_TF-LABEL: define void @test_linear_non_ptr_neg_stride
+; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride
; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
;
@@ -229,25 +229,25 @@ declare i32 @quux(ptr, ptr)
declare i32 @bar(i32)
; neon vector variants of foo
-declare <2 x i64> @neon_foo_linear(ptr)
-declare <2 x i64> @neon_foo_linear_2x(ptr)
-declare <4 x i32> @neon_baz_vector_and_linear(<4 x i32>, ptr)
-declare <4 x i32> @neon_quux_linear(ptr, ptr)
-declare <4 x i32> @neon_bar_linear(i32)
-declare <4 x i32> @neon_bar_neg_linear(i32)
+declare <2 x i64> @vec_foo_linear8_nomask_neon(ptr)
+declare <2 x i64> @vec_foo_linear16_nomask_neon(ptr)
+declare <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32>, ptr)
+declare <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr, ptr)
+declare <4 x i32> @vec_bar_linear3_nomask_neon(i32)
+declare <4 x i32> @vec_bar_linearn5_nomask_neon(i32)
; scalable vector variants of foo
-declare <vscale x 2 x i64> @sve_foo_linear(ptr, <vscale x 2 x i1>)
-declare <vscale x 2 x i64> @sve_foo_linear_nomask(ptr)
-declare <vscale x 2 x i64> @sve_foo_linear_nomask_2x(ptr)
-declare <vscale x 4 x i32> @sve_baz_vector_and_linear(<vscale x 4 x i32>, ptr)
-declare <vscale x 4 x i32> @sve_quux_linear_mask(ptr, ptr, <vscale x 4 x i1>)
-declare <vscale x 4 x i32> @sve_bar_linear(i32)
-declare <vscale x 4 x i32> @sve_bar_neg_linear(i32)
-
-attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(sve_foo_linear_nomask),_ZGVsMxl8_foo(sve_foo_linear),_ZGVnN2l8_foo(neon_foo_linear)" }
-attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(sve_baz_vector_and_linear),_ZGVnN4vl4_baz(neon_baz_vector_and_linear)" }
-attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(sve_foo_linear_nomask_2x),_ZGVnN2l16_foo(neon_foo_linear_2x)" }
-attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(sve_quux_linear_mask),_ZGVnN4l4l8_quux(neon_quux_linear)" }
-attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(sve_bar_linear),_ZGVnN4l3_bar(neon_bar_linear)" }
-attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(sve_bar_neg_linear),_ZGVnN4ln5_bar(neon_bar_neg_linear)" }
+declare <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr, <vscale x 2 x i1>)
+declare <vscale x 2 x i64> @vec_foo_linear8_nomask_sve(ptr)
+declare <vscale x 2 x i64> @vec_foo_linear16_nomask_sve(ptr)
+declare <vscale x 4 x i32> @vec_baz_vector_linear4_nomask_sve(<vscale x 4 x i32>, ptr)
+declare <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr, ptr, <vscale x 4 x i1>)
+declare <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32)
+declare <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32)
+
+attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(vec_foo_linear8_nomask_sve),_ZGVsMxl8_foo(vec_foo_linear8_mask_sve),_ZGVnN2l8_foo(vec_foo_linear8_nomask_neon)" }
+attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(vec_baz_vector_linear4_nomask_sve),_ZGVnN4vl4_baz(vec_baz_vector_linear4_nomask_neon)" }
+attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(vec_foo_linear16_nomask_sve),_ZGVnN2l16_foo(vec_foo_linear16_nomask_neon)" }
+attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(vec_quux_linear4_linear8_mask_sve),_ZGVnN4l4l8_quux(vec_quux_linear4_linear8_nomask_neon)" }
+attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(vec_bar_linear3_nomask_sve),_ZGVnN4l3_bar(vec_bar_linear3_nomask_neon)" }
+attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(vec_bar_linearn5_nomask_sve),_ZGVnN4ln5_bar(vec_bar_linearn5_nomask_neon)" }
More information about the llvm-commits
mailing list