[PATCH] D147056: [SLP][AArch64] Add test to check for the vectorization of fshl

Zain Jaffal via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 28 08:49:39 PDT 2023


zjaffal created this revision.
zjaffal added a reviewer: fhahn.
Herald added subscribers: vporpo, StephenFan, kristof.beyls.
Herald added a project: All.
zjaffal requested review of this revision.
Herald added subscribers: llvm-commits, pcwang-thead.
Herald added a project: LLVM.

Currently the cost for fshl is an overestimate causing SLP to vectorize when it is not necessary.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D147056

Files:
  llvm/test/Transforms/SLPVectorizer/AArch64/fshl-vectorize.ll


Index: llvm/test/Transforms/SLPVectorizer/AArch64/fshl-vectorize.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/AArch64/fshl-vectorize.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=arm64-apple-ios -S -passes=slp-vectorizer < %s | FileCheck %s
+; fshl instruction cost model is an overestimate causing this test to vectorize when it is not beneficial to do so.
+
+define i64 @fshl(i64 %or1, i64 %or2, i64 %or3  ) {
+; CHECK-LABEL: define i64 @fshl
+; CHECK-SAME: (i64 [[OR1:%.*]], i64 [[OR2:%.*]], i64 [[OR3:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[OR2]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[OR3]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 2>)
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[OR1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> <i64 17, i64 21>)
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> <i64 0, i64 poison>, <2 x i32> <i32 2, i32 0>
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP2]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP7]], [[TMP3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <2 x i64> [[TMP5]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
+; CHECK-NEXT:    [[ADD3:%.*]] = or i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
+; CHECK-NEXT:    [[XOR5:%.*]] = xor i64 [[ADD3]], [[TMP12]]
+; CHECK-NEXT:    ret i64 [[XOR5]]
+;
+entry:
+  %or4 = tail call i64 @llvm.fshl.i64(i64 %or2, i64 0, i64 1)
+  %xor1 = xor i64 %or4, 0
+  %or5 = tail call i64 @llvm.fshl.i64(i64 %or3, i64 0, i64 2)
+  %xor2 = xor i64 %or5, %or1
+  %add1 = add i64 %xor1, %or1
+  %add2 = add i64 0, %xor2
+  %or6 = tail call i64 @llvm.fshl.i64(i64 %or1, i64 %or2, i64 17)
+  %xor3 = xor i64 %or6, %add1
+  %or7 = tail call i64 @llvm.fshl.i64(i64 0, i64 0, i64 21)
+  %xor4 = xor i64 %or7, %add2
+  %add3 = or i64 %xor3, %add2
+  %xor5 = xor i64 %add3, %xor4
+  ret i64 %xor5
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147056.509037.patch
Type: text/x-patch
Size: 2643 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230328/68fb904e/attachment.bin>


More information about the llvm-commits mailing list