[llvm] 162284b - [SLP][X86] Add SSE4 test coverage to minmax reduction tests

Wed Apr 12 09:41:42 PDT 2023

Author: Simon Pilgrim
Date: 2023-04-12T17:41:31+01:00
New Revision: 162284b2e1a970a01144d1d8e7f8d4fd1e03c5bf

URL: https://github.com/llvm/llvm-project/commit/162284b2e1a970a01144d1d8e7f8d4fd1e03c5bf
DIFF: https://github.com/llvm/llvm-project/commit/162284b2e1a970a01144d1d8e7f8d4fd1e03c5bf.diff

LOG: [SLP][X86] Add SSE4 test coverage to minmax reduction tests

Improve coverage for D148036

Added: 
    

Modified: 
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index a4489ce026bd..9c456df25f22 100644

--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE4
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -passes=slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
@@ -1112,18 +1113,23 @@ define i16 @smin_intrinsic_rdx_v8i16(ptr %p0) {
 }
 
 define i64 @umax_intrinsic_rdx_v4i64(ptr %p0) {
-; SSE-LABEL: @umax_intrinsic_rdx_v4i64(
-; SSE-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
-; SSE-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
-; SSE-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
-; SSE-NEXT:    [[T0:%.*]] = load i64, ptr [[P0]], align 4
-; SSE-NEXT:    [[T1:%.*]] = load i64, ptr [[P1]], align 4
-; SSE-NEXT:    [[T2:%.*]] = load i64, ptr [[P2]], align 4
-; SSE-NEXT:    [[T3:%.*]] = load i64, ptr [[P3]], align 4
-; SSE-NEXT:    [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
-; SSE-NEXT:    [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
-; SSE-NEXT:    [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
-; SSE-NEXT:    ret i64 [[M]]
+; SSE2-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT:    [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
+; SSE2-NEXT:    [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
+; SSE2-NEXT:    [[T0:%.*]] = load i64, ptr [[P0]], align 4
+; SSE2-NEXT:    [[T1:%.*]] = load i64, ptr [[P1]], align 4
+; SSE2-NEXT:    [[T2:%.*]] = load i64, ptr [[P2]], align 4
+; SSE2-NEXT:    [[T3:%.*]] = load i64, ptr [[P3]], align 4
+; SSE2-NEXT:    [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
+; SSE2-NEXT:    [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
+; SSE2-NEXT:    [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
+; SSE2-NEXT:    ret i64 [[M]]
+;
+; SSE4-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE4-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[P0:%.*]], align 4
+; SSE4-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP1]])
+; SSE4-NEXT:    ret i64 [[TMP2]]
 ;
 ; AVX-LABEL: @umax_intrinsic_rdx_v4i64(
 ; AVX-NEXT:    [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
@@ -1246,3 +1252,5 @@ define void @PR49730() {
   %t14 = call i32 @llvm.umin.i32(i32 %t13, i32 93)
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE: {{.*}}

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
index 88f584f2132f..a838621a71d4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX
 
@@ -102,3 +103,5 @@ define i32 @smax_v16i32(i32) {
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; AVX: {{.*}}
 ; SSE: {{.*}}
+; SSE2: {{.*}}
+; SSE4: {{.*}}