[llvm] [X86][AVX] Match v4f64 blend from shuffle of scalar values. (PR #135753)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 15 06:40:27 PDT 2025
================
@@ -263,3 +263,389 @@ define <8 x i16> @blendw_to_blendd_fail_16(<8 x i16> %x, <8 x i16> %y, <8 x i16>
%shuffle = shufflevector <8 x i16> %x1, <8 x i16> %y, <8 x i32> <i32 8, i32 1, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
ret <8 x i16> %shuffle
}
+
+define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) {
+; X86-SSE41-LABEL: blend_broadcasts_v4f64:
+; X86-SSE41: # %bb.0:
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE41-NEXT: movaps (%ecx), %xmm2
+; X86-SSE41-NEXT: movaps (%eax), %xmm1
+; X86-SSE41-NEXT: movaps %xmm2, %xmm0
+; X86-SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE41-NEXT: retl
+;
+; X64-SSE41-LABEL: blend_broadcasts_v4f64:
+; X64-SSE41: # %bb.0:
+; X64-SSE41-NEXT: movaps (%rdi), %xmm2
+; X64-SSE41-NEXT: movaps (%rsi), %xmm1
+; X64-SSE41-NEXT: movaps %xmm2, %xmm0
+; X64-SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X64-SSE41-NEXT: retq
+;
+; X86-AVX-LABEL: blend_broadcasts_v4f64:
+; X86-AVX: # %bb.0:
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX-NEXT: vbroadcastsd (%ecx), %ymm0
+; X86-AVX-NEXT: vbroadcastsd (%eax), %ymm1
+; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; X86-AVX-NEXT: retl
+;
+; X64-AVX-LABEL: blend_broadcasts_v4f64:
+; X64-AVX: # %bb.0:
+; X64-AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-AVX-NEXT: vbroadcastsd (%rsi), %ymm1
+; X64-AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; X64-AVX-NEXT: retq
+;
+; X86-AVX2-LABEL: blend_broadcasts_v4f64:
+; X86-AVX2: # %bb.0:
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX2-NEXT: vbroadcastsd (%ecx), %ymm0
+; X86-AVX2-NEXT: vbroadcastsd (%eax), %ymm1
+; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; X86-AVX2-NEXT: retl
+;
+; X64-AVX2-LABEL: blend_broadcasts_v4f64:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-AVX2-NEXT: vbroadcastsd (%rsi), %ymm1
+; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; X64-AVX2-NEXT: retq
+;
+; X86-AVX512-LABEL: blend_broadcasts_v4f64:
+; X86-AVX512: # %bb.0:
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT: vbroadcastsd (%ecx), %ymm0
+; X86-AVX512-NEXT: vbroadcastsd (%eax), %ymm1
+; X86-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; X86-AVX512-NEXT: retl
+;
+; X64-AVX512-LABEL: blend_broadcasts_v4f64:
+; X64-AVX512: # %bb.0:
+; X64-AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-AVX512-NEXT: vbroadcastsd (%rsi), %ymm1
+; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; X64-AVX512-NEXT: retq
+ %ld0 = load <4 x double>, ptr %p0, align 32
+ %ld1 = load <4 x double>, ptr %p1, align 32
+ %bcst0 = shufflevector <4 x double> %ld0, <4 x double> poison, <4 x i32> zeroinitializer
+ %bcst1 = shufflevector <4 x double> %ld1, <4 x double> poison, <4 x i32> zeroinitializer
+ %blend = shufflevector <4 x double> %bcst0, <4 x double> %bcst1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+ ret <4 x double> %blend
+}
----------------
RKSimon wrote:
these should be in vector-shuffle-256-v4.ll
https://github.com/llvm/llvm-project/pull/135753
More information about the llvm-commits
mailing list