[llvm] 4d7d454 - [SLP][AArch64] Add test to check for the vectorization of fshl

Tue Mar 28 09:46:46 PDT 2023

Author: Zain Jaffal
Date: 2023-03-28T17:46:33+01:00
New Revision: 4d7d4543344b3bb99d971f5393de2d538cfb391c

URL: https://github.com/llvm/llvm-project/commit/4d7d4543344b3bb99d971f5393de2d538cfb391c
DIFF: https://github.com/llvm/llvm-project/commit/4d7d4543344b3bb99d971f5393de2d538cfb391c.diff

LOG: [SLP][AArch64] Add test to check for the vectorization of fshl

Currently the cost for fshl is an overestimate causing SLP to vectorize when it is not necessary.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D147056

Added: 
    llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
new file mode 100644
index 0000000000000..2704a4439f0d5

--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fshl.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=arm64-apple-ios -S -passes=slp-vectorizer < %s | FileCheck %s
+
+; fshl instruction cost model is an overestimate causing this test to vectorize when it is not beneficial to do so.
+define i64 @fshl(i64 %or1, i64 %or2, i64 %or3  ) {
+; CHECK-LABEL: define i64 @fshl
+; CHECK-SAME: (i64 [[OR1:%.*]], i64 [[OR2:%.*]], i64 [[OR3:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[OR2]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[OR3]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[OR1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> <i64 17, i64 21>)
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> <i64 0, i64 poison>, <2 x i32> <i32 2, i32 0>
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP2]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP7]], [[TMP3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
+; CHECK-NEXT:    [[ADD3:%.*]] = or i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
+; CHECK-NEXT:    [[XOR5:%.*]] = xor i64 [[ADD3]], [[TMP12]]
+; CHECK-NEXT:    ret i64 [[XOR5]]
+;
+entry:
+  %or4 = tail call i64 @llvm.fshl.i64(i64 %or2, i64 0, i64 1)
+  %xor1 = xor i64 %or4, 0
+  %or5 = tail call i64 @llvm.fshl.i64(i64 %or3, i64 0, i64 2)
+  %xor2 = xor i64 %or5, %or1
+  %add1 = add i64 %xor1, %or1
+  %add2 = add i64 0, %xor2
+  %or6 = tail call i64 @llvm.fshl.i64(i64 %or1, i64 %or2, i64 17)
+  %xor3 = xor i64 %or6, %add1
+  %or7 = tail call i64 @llvm.fshl.i64(i64 0, i64 0, i64 21)
+  %xor4 = xor i64 %or7, %add2
+  %add3 = or i64 %xor3, %add2
+  %xor5 = xor i64 %add3, %xor4
+  ret i64 %xor5
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)