[llvm] b4c6607 - [VectorCombine][X86] Add test showing foldShuffleOfShuffles folding shuffles that would be better separate
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 03:17:36 PDT 2024
Author: Simon Pilgrim
Date: 2024-04-23T11:11:14+01:00
New Revision: b4c6607add8bba778b0a278ce7d069c057219eed
URL: https://github.com/llvm/llvm-project/commit/b4c6607add8bba778b0a278ce7d069c057219eed
DIFF: https://github.com/llvm/llvm-project/commit/b4c6607add8bba778b0a278ce7d069c057219eed.diff
LOG: [VectorCombine][X86] Add test showing foldShuffleOfShuffles folding shuffles that would be better separate
On AVX+ targets a broadcast load can be treated as free.
Added:
Modified:
llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
index 9079a4f693ab8b..21d9d1cee5d1fa 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
; fold to identity
@@ -43,3 +43,28 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
%concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i32> %concat
}
+
+define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) {
+; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64(
+; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
+; SSE-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
+; SSE-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
+; SSE-NEXT: [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer
+; SSE-NEXT: [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer
+; SSE-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; SSE-NEXT: ret <4 x double> [[BLEND]]
+;
+; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64(
+; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
+; AVX-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
+; AVX-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
+; AVX-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> <i32 0, i32 4, i32 4, i32 0>
+; AVX-NEXT: ret <4 x double> [[BLEND]]
+;
+ %ld0 = load <4 x double>, ptr %p0, align 32
+ %ld1 = load <4 x double>, ptr %p1, align 32
+ %bcst0 = shufflevector <4 x double> %ld0, <4 x double> undef, <4 x i32> zeroinitializer
+ %bcst1 = shufflevector <4 x double> %ld1, <4 x double> undef, <4 x i32> zeroinitializer
+ %blend = shufflevector <4 x double> %bcst0, <4 x double> %bcst1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+ ret <4 x double> %blend
+}
More information about the llvm-commits
mailing list