[llvm] b4c6607 - [VectorCombine][X86] Add test showing foldShuffleOfShuffles folding shuffles that would be better separate

Tue Apr 23 03:17:36 PDT 2024

Author: Simon Pilgrim
Date: 2024-04-23T11:11:14+01:00
New Revision: b4c6607add8bba778b0a278ce7d069c057219eed

URL: https://github.com/llvm/llvm-project/commit/b4c6607add8bba778b0a278ce7d069c057219eed
DIFF: https://github.com/llvm/llvm-project/commit/b4c6607add8bba778b0a278ce7d069c057219eed.diff

LOG: [VectorCombine][X86] Add test showing foldShuffleOfShuffles folding shuffles that would be better separate

On AVX+ targets a broadcast load can be treated as free.

Added: 
    

Modified: 
    llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
index 9079a4f693ab8b..21d9d1cee5d1fa 100644

--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
 
 ; fold to identity
 
@@ -43,3 +43,28 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
   %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i32> %concat
 }
+
+define  <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1)  {
+; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64(
+; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT:    [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
+; SSE-NEXT:    [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
+; SSE-NEXT:    [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:    [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer
+; SSE-NEXT:    [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; SSE-NEXT:    ret <4 x double> [[BLEND]]
+;
+; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64(
+; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT:    [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
+; AVX-NEXT:    [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
+; AVX-NEXT:    [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> <i32 0, i32 4, i32 4, i32 0>
+; AVX-NEXT:    ret <4 x double> [[BLEND]]
+;
+  %ld0 = load <4 x double>, ptr %p0, align 32
+  %ld1 = load <4 x double>, ptr %p1, align 32
+  %bcst0 = shufflevector <4 x double> %ld0, <4 x double> undef, <4 x i32> zeroinitializer
+  %bcst1 = shufflevector <4 x double> %ld1, <4 x double> undef, <4 x i32> zeroinitializer
+  %blend = shufflevector <4 x double> %bcst0, <4 x double> %bcst1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  ret <4 x double> %blend
+}