[llvm] 984b46e - [SLP] Add test to check for GEP vectorization
Zain Jaffal via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 27 09:53:07 PDT 2023
Author: Zain Jaffal
Date: 2023-03-27T17:52:55+01:00
New Revision: 984b46e6cc2ad9291959827b99913deac90d698e
URL: https://github.com/llvm/llvm-project/commit/984b46e6cc2ad9291959827b99913deac90d698e
DIFF: https://github.com/llvm/llvm-project/commit/984b46e6cc2ad9291959827b99913deac90d698e.diff
LOG: [SLP] Add test to check for GEP vectorization
add a test to check for gep vectorization after the change from D144128 where the gep vectorization is dependant on the target hook `prefersVectorizedAddressing()`
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D146540
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/vector-getelementptr.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vector-getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vector-getelementptr.ll
new file mode 100644
index 0000000000000..14ce08cb7aebe
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vector-getelementptr.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=arm64-apple-ios -S -passes=slp-vectorizer < %s | FileCheck %s
+; vectorization requires a vector GEP + extracts, but the cost is offset by being able to efficiently vectorize the rest of the tree
+
+define void @should_vectorize_gep(ptr %base1, ptr %base2, ptr %base_gep) {
+; CHECK-LABEL: define void @should_vectorize_gep
+; CHECK-SAME: (ptr [[BASE1:%.*]], ptr [[BASE2:%.*]], ptr [[BASE_GEP:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[BASE1]], align 2
+; CHECK-NEXT: [[ZEXT1:%.*]] = zext i32 [[LOAD1]] to i64
+; CHECK-NEXT: [[LOAD2:%.*]] = load i32, ptr [[BASE2]], align 2
+; CHECK-NEXT: [[ZEXT2:%.*]] = zext i32 [[LOAD2]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[ZEXT1]], [[ZEXT2]]
+; CHECK-NEXT: [[GETELEMENTPTR_RES_1:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[SUB]]
+; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[BASE1]], i64 1
+; CHECK-NEXT: [[GETELEMENTPTR2:%.*]] = getelementptr i32, ptr [[BASE2]], i64 1
+; CHECK-NEXT: [[LOAD3:%.*]] = load i32, ptr [[GETELEMENTPTR1]], align 2
+; CHECK-NEXT: [[ZEXT3:%.*]] = zext i32 [[LOAD3]] to i64
+; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[GETELEMENTPTR2]], align 2
+; CHECK-NEXT: [[ZEXT4:%.*]] = zext i32 [[LOAD4]] to i64
+; CHECK-NEXT: [[SUB2:%.*]] = sub i64 [[ZEXT3]], [[ZEXT4]]
+; CHECK-NEXT: [[GETELEMENTPTR_RES_2:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[SUB2]]
+; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[BASE1]], i64 2
+; CHECK-NEXT: [[GETELEMENTPTR4:%.*]] = getelementptr i32, ptr [[BASE2]], i64 2
+; CHECK-NEXT: [[LOAD5:%.*]] = load i32, ptr [[GETELEMENTPTR3]], align 2
+; CHECK-NEXT: [[ZEXT5:%.*]] = zext i32 [[LOAD5]] to i64
+; CHECK-NEXT: [[LOAD6:%.*]] = load i32, ptr [[GETELEMENTPTR4]], align 2
+; CHECK-NEXT: [[ZEXT6:%.*]] = zext i32 [[LOAD6]] to i64
+; CHECK-NEXT: [[SUB3:%.*]] = sub i64 [[ZEXT5]], [[ZEXT6]]
+; CHECK-NEXT: [[GETELEMENTPTR_RES_3:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[SUB3]]
+; CHECK-NEXT: [[GETELEMENTPTR5:%.*]] = getelementptr i32, ptr [[BASE1]], i64 3
+; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr [[BASE2]], i64 3
+; CHECK-NEXT: [[LOAD7:%.*]] = load i32, ptr [[GETELEMENTPTR5]], align 2
+; CHECK-NEXT: [[ZEXT7:%.*]] = zext i32 [[LOAD7]] to i64
+; CHECK-NEXT: [[LOAD8:%.*]] = load i32, ptr [[GETELEMENTPTR6]], align 2
+; CHECK-NEXT: [[ZEXT8:%.*]] = zext i32 [[LOAD8]] to i64
+; CHECK-NEXT: [[SUB4:%.*]] = sub i64 [[ZEXT7]], [[ZEXT8]]
+; CHECK-NEXT: [[GETELEMENTPTR_RES_4:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[SUB4]]
+; CHECK-NEXT: call void @use_4(ptr [[GETELEMENTPTR_RES_1]], ptr [[GETELEMENTPTR_RES_2]], ptr [[GETELEMENTPTR_RES_3]], ptr [[GETELEMENTPTR_RES_4]])
+; CHECK-NEXT: ret void
+;
+bb:
+ %load1 = load i32, ptr %base1, align 2
+ %zext1 = zext i32 %load1 to i64
+ %load2 = load i32, ptr %base2, align 2
+ %zext2 = zext i32 %load2 to i64
+ %sub = sub i64 %zext1, %zext2
+ %getelementptr.res.1 = getelementptr i32, ptr %base_gep, i64 %sub
+ %getelementptr1 = getelementptr i32, ptr %base1, i64 1
+ %getelementptr2 = getelementptr i32, ptr %base2, i64 1
+ %load3 = load i32, ptr %getelementptr1, align 2
+ %zext3 = zext i32 %load3 to i64
+ %load4 = load i32, ptr %getelementptr2, align 2
+ %zext4= zext i32 %load4 to i64
+ %sub2 = sub i64 %zext3, %zext4
+ %getelementptr.res.2 = getelementptr i32, ptr %base_gep, i64 %sub2
+ %getelementptr3 = getelementptr i32, ptr %base1, i64 2
+ %getelementptr4 = getelementptr i32, ptr %base2, i64 2
+ %load5 = load i32, ptr %getelementptr3, align 2
+ %zext5 = zext i32 %load5 to i64
+ %load6 = load i32, ptr %getelementptr4, align 2
+ %zext6 = zext i32 %load6 to i64
+ %sub3 = sub i64 %zext5, %zext6
+ %getelementptr.res.3 = getelementptr i32, ptr %base_gep, i64 %sub3
+ %getelementptr5 = getelementptr i32, ptr %base1, i64 3
+ %getelementptr6 = getelementptr i32, ptr %base2, i64 3
+ %load7 = load i32, ptr %getelementptr5, align 2
+ %zext7 = zext i32 %load7 to i64
+ %load8 = load i32, ptr %getelementptr6, align 2
+ %zext8 = zext i32 %load8 to i64
+ %sub4 = sub i64 %zext7, %zext8
+ %getelementptr.res.4 = getelementptr i32, ptr %base_gep, i64 %sub4
+ call void @use_4(ptr %getelementptr.res.1, ptr %getelementptr.res.2, ptr %getelementptr.res.3, ptr %getelementptr.res.4)
+ ret void
+}
+
+declare void @use_4(ptr, ptr, ptr, ptr)
More information about the llvm-commits
mailing list