[llvm] 4a42658 - [VectorCombine][X86] shuffle-of-cmps.ll - tweak shuf_fcmp_oeq_v4i32 shuffle to be not so cheap

Tue Jan 7 03:11:24 PST 2025

Author: Simon Pilgrim
Date: 2025-01-07T11:07:48Z
New Revision: 4a42658c1be47ea8cb6f26f7cb1d1aed258845fe

URL: https://github.com/llvm/llvm-project/commit/4a42658c1be47ea8cb6f26f7cb1d1aed258845fe
DIFF: https://github.com/llvm/llvm-project/commit/4a42658c1be47ea8cb6f26f7cb1d1aed258845fe.diff

LOG: [VectorCombine][X86] shuffle-of-cmps.ll - tweak shuf_fcmp_oeq_v4i32 shuffle to be not so cheap

An upcoming patch will recognise this as a cheap INSERTPS shuffle - alter the shuffle to ensure the 2 x FCMP is still cheaper on SSE4 targets

Added: 
    

Modified: 
    llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
index 95068ad1f2a432..f9108efa7ee793 100644

--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
 
 declare void @use(<4 x i1>)
 
@@ -105,8 +105,8 @@ define <4 x i32> @shuf_icmp_ugt_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z,
 define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 ; SSE2-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
 ; SSE2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
-; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0>
-; SSE2-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>
+; SSE2-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
 ; SSE2-NEXT:    [[S:%.*]] = fcmp oeq <4 x float> [[TMP1]], [[TMP2]]
 ; SSE2-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
 ; SSE2-NEXT:    ret <4 x i32> [[R]]
@@ -115,21 +115,29 @@ define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float
 ; SSE4-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
 ; SSE4-NEXT:    [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
 ; SSE4-NEXT:    [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
-; SSE4-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
+; SSE4-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
 ; SSE4-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
 ; SSE4-NEXT:    ret <4 x i32> [[R]]
 ;
-; AVX-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
-; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
-; AVX-NEXT:    [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
-; AVX-NEXT:    [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
-; AVX-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
-; AVX-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
-; AVX-NEXT:    ret <4 x i32> [[R]]
+; AVX2-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
+; AVX2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX2-NEXT:    [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
+; AVX2-NEXT:    [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
+; AVX2-NEXT:    [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
+; AVX2-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX2-NEXT:    ret <4 x i32> [[R]]
+;
+; AVX512-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
+; AVX512-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; AVX512-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>
+; AVX512-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
+; AVX512-NEXT:    [[S:%.*]] = fcmp oeq <4 x float> [[TMP1]], [[TMP2]]
+; AVX512-NEXT:    [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; AVX512-NEXT:    ret <4 x i32> [[R]]
 ;
   %b0 = fcmp oeq <4 x float> %x, %y
   %b1 = fcmp oeq <4 x float> %x, %z
-  %s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
+  %s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
   %r = sext <4 x i1> %s to <4 x i32>
   ret <4 x i32> %r
 }