[llvm] e165bc2 - [SLP][AArch64] Extend extracts-from-scalarizable-vector.ll test for cmp cost testing. NFC

Wed Jun 28 09:16:38 PDT 2023

Author: David Green
Date: 2023-06-28T17:16:34+01:00
New Revision: e165bc263132d3445f69a31e95a7b82336ec7a1b

URL: https://github.com/llvm/llvm-project/commit/e165bc263132d3445f69a31e95a7b82336ec7a1b
DIFF: https://github.com/llvm/llvm-project/commit/e165bc263132d3445f69a31e95a7b82336ec7a1b.diff

LOG: [SLP][AArch64] Extend extracts-from-scalarizable-vector.ll test for cmp cost testing. NFC

See D153507. The existing test is over-simplified, as written it should have
been simpified prior to SLP vectorization. I have left it as-is to ensure the
crash it was protecting against doesn't arise again. A new test with valid
inputs is also added to show the incorrect costs of alt cmp vectorization.

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
index 1ace27f1b46ef..ac4219e4c7f3f 100644

--- a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
-define i1 @test() {
-; CHECK-LABEL: define i1 @test() {
+define i1 @degenerate() {
+; CHECK-LABEL: define i1 @degenerate() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP0]])
@@ -20,3 +20,28 @@ entry:
   %or.cond30 = select i1 %or.cond29, i1 %cmp10, i1 false
   ret i1 %or.cond30
 }
+
+define i1 @with_inputs(<4 x fp128> %a) {
+; CHECK-LABEL: define i1 @with_inputs
+; CHECK-SAME: (<4 x fp128> [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <4 x fp128> [[A]], <4 x fp128> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <4 x fp128> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <4 x fp128> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    ret i1 [[TMP5]]
+;
+entry:
+  %0 = extractelement <4 x fp128> %a, i32 0
+  %cmp = fcmp ogt fp128 %0, 0xL00000000000000000000000000000000
+  %cmp3 = fcmp olt fp128 %0, 0xL00000000000000000000000000000000
+  %or.cond = and i1 %cmp, %cmp3
+  %1 = extractelement <4 x fp128> %a, i32 1
+  %cmp6 = fcmp ogt fp128 %1, 0xL00000000000000000000000000000000
+  %or.cond29 = select i1 %or.cond, i1 %cmp6, i1 false
+  %cmp10 = fcmp olt fp128 %1, 0xL00000000000000000000000000000000
+  %or.cond30 = select i1 %or.cond29, i1 %cmp10, i1 false
+  ret i1 %or.cond30
+}