[llvm] 162284b - [SLP][X86] Add SSE4 test coverage to minmax reduction tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 12 09:41:42 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-12T17:41:31+01:00
New Revision: 162284b2e1a970a01144d1d8e7f8d4fd1e03c5bf
URL: https://github.com/llvm/llvm-project/commit/162284b2e1a970a01144d1d8e7f8d4fd1e03c5bf
DIFF: https://github.com/llvm/llvm-project/commit/162284b2e1a970a01144d1d8e7f8d4fd1e03c5bf.diff
LOG: [SLP][X86] Add SSE4 test coverage to minmax reduction tests
Improve coverage for D148036
Added:
Modified:
llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index a4489ce026bd..9c456df25f22 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,SSE,SSE4
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,DEFAULT,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -passes=slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,THRESH
@@ -1112,18 +1113,23 @@ define i16 @smin_intrinsic_rdx_v8i16(ptr %p0) {
}
define i64 @umax_intrinsic_rdx_v4i64(ptr %p0) {
-; SSE-LABEL: @umax_intrinsic_rdx_v4i64(
-; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
-; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
-; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
-; SSE-NEXT: [[T0:%.*]] = load i64, ptr [[P0]], align 4
-; SSE-NEXT: [[T1:%.*]] = load i64, ptr [[P1]], align 4
-; SSE-NEXT: [[T2:%.*]] = load i64, ptr [[P2]], align 4
-; SSE-NEXT: [[T3:%.*]] = load i64, ptr [[P3]], align 4
-; SSE-NEXT: [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
-; SSE-NEXT: [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
-; SSE-NEXT: [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
-; SSE-NEXT: ret i64 [[M]]
+; SSE2-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
+; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 2
+; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P0]], i64 3
+; SSE2-NEXT: [[T0:%.*]] = load i64, ptr [[P0]], align 4
+; SSE2-NEXT: [[T1:%.*]] = load i64, ptr [[P1]], align 4
+; SSE2-NEXT: [[T2:%.*]] = load i64, ptr [[P2]], align 4
+; SSE2-NEXT: [[T3:%.*]] = load i64, ptr [[P3]], align 4
+; SSE2-NEXT: [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
+; SSE2-NEXT: [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
+; SSE2-NEXT: [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
+; SSE2-NEXT: ret i64 [[M]]
+;
+; SSE4-LABEL: @umax_intrinsic_rdx_v4i64(
+; SSE4-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P0:%.*]], align 4
+; SSE4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP1]])
+; SSE4-NEXT: ret i64 [[TMP2]]
;
; AVX-LABEL: @umax_intrinsic_rdx_v4i64(
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P0:%.*]], i64 1
@@ -1246,3 +1252,5 @@ define void @PR49730() {
%t14 = call i32 @llvm.umin.i32(i32 %t13, i32 93)
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE: {{.*}}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
index 88f584f2132f..a838621a71d4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-smax.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX
@@ -102,3 +103,5 @@ define i32 @smax_v16i32(i32) {
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; AVX: {{.*}}
; SSE: {{.*}}
+; SSE2: {{.*}}
+; SSE4: {{.*}}
More information about the llvm-commits
mailing list