[llvm] 4d7d454 - [SLP][AArch64] Add test to check for the vectorization of fshl
Zain Jaffal via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 28 09:46:46 PDT 2023
Author: Zain Jaffal
Date: 2023-03-28T17:46:33+01:00
New Revision: 4d7d4543344b3bb99d971f5393de2d538cfb391c
URL: https://github.com/llvm/llvm-project/commit/4d7d4543344b3bb99d971f5393de2d538cfb391c
DIFF: https://github.com/llvm/llvm-project/commit/4d7d4543344b3bb99d971f5393de2d538cfb391c.diff
LOG: [SLP][AArch64] Add test to check for the vectorization of fshl
Currently the cost for fshl is an overestimate causing SLP to vectorize when it is not necessary.
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D147056
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
new file mode 100644
index 0000000000000..2704a4439f0d5
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=arm64-apple-ios -S -passes=slp-vectorizer < %s | FileCheck %s
+
+; fshl instruction cost model is an overestimate causing this test to vectorize when it is not beneficial to do so.
+define i64 @fshl(i64 %or1, i64 %or2, i64 %or3 ) {
+; CHECK-LABEL: define i64 @fshl
+; CHECK-SAME: (i64 [[OR1:%.*]], i64 [[OR2:%.*]], i64 [[OR3:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[OR2]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[OR3]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[OR1]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> <i64 17, i64 21>)
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> <i64 0, i64 poison>, <2 x i32> <i32 2, i32 0>
+; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP2]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP7]], [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
+; CHECK-NEXT: [[ADD3:%.*]] = or i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
+; CHECK-NEXT: [[XOR5:%.*]] = xor i64 [[ADD3]], [[TMP12]]
+; CHECK-NEXT: ret i64 [[XOR5]]
+;
+entry:
+ %or4 = tail call i64 @llvm.fshl.i64(i64 %or2, i64 0, i64 1)
+ %xor1 = xor i64 %or4, 0
+ %or5 = tail call i64 @llvm.fshl.i64(i64 %or3, i64 0, i64 2)
+ %xor2 = xor i64 %or5, %or1
+ %add1 = add i64 %xor1, %or1
+ %add2 = add i64 0, %xor2
+ %or6 = tail call i64 @llvm.fshl.i64(i64 %or1, i64 %or2, i64 17)
+ %xor3 = xor i64 %or6, %add1
+ %or7 = tail call i64 @llvm.fshl.i64(i64 0, i64 0, i64 21)
+ %xor4 = xor i64 %or7, %add2
+ %add3 = or i64 %xor3, %add2
+ %xor5 = xor i64 %add3, %xor4
+ ret i64 %xor5
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
More information about the llvm-commits
mailing list