[llvm] [X86][AVX] Match v4f64 blend from shuffle of scalar values. (PR #135753)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 24 03:57:33 PDT 2025
================
@@ -2360,6 +2360,87 @@ define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) {
ret <4 x double> %unpckh
}
+define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) {
+; ALL-LABEL: blend_broadcasts_v4f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT: vbroadcastsd (%rsi), %ymm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; ALL-NEXT: retq
+ %ld0 = load <4 x double>, ptr %p0, align 32
+ %ld1 = load <4 x double>, ptr %p1, align 32
+ %bcst0 = shufflevector <4 x double> %ld0, <4 x double> poison, <4 x i32> zeroinitializer
+ %bcst1 = shufflevector <4 x double> %ld1, <4 x double> poison, <4 x i32> zeroinitializer
+ %blend = shufflevector <4 x double> %bcst0, <4 x double> %bcst1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+ ret <4 x double> %blend
+}
+
+define <4 x double> @blend_broadcasts_v2f64(ptr %p0, ptr %p1) {
+; AVX1OR2-LABEL: blend_broadcasts_v2f64:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
+; AVX1OR2-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
+; AVX1OR2-NEXT: vblendps {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX1OR2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[2],ymm2[2]
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: blend_broadcasts_v2f64:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1]
+; AVX512VL-NEXT: vbroadcastf128 {{.*#+}} ymm2 = mem[0,1,0,1]
+; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm0 = [0,4,6,2]
+; AVX512VL-NEXT: vpermi2pd %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT: retq
+ %ld0 = load <2 x double>, ptr %p0, align 32
+ %ld1 = load <2 x double>, ptr %p1, align 32
+ %blend = shufflevector <2 x double> %ld0, <2 x double> %ld1, <4 x i32> <i32 0, i32 2, i32 2, i32 0>
+ ret <4 x double> %blend
+}
+
+define <4 x double> @blend_broadcasts_v1f64(ptr %p0, ptr %p1) {
+; ALL-LABEL: blend_broadcasts_v1f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
+; ALL-NEXT: retq
+ %ld0 = load <1 x double>, ptr %p0, align 32
+ %ld1 = load <1 x double>, ptr %p1, align 32
+ %blend = shufflevector <1 x double> %ld0, <1 x double> %ld1, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+ ret <4 x double> %blend
+}
+
+define <4 x double> @blend_broadcasts_v1f64_4x(ptr %p0, ptr %p1) {
+; ALL-LABEL: blend_broadcasts_v1f64_4x:
+; ALL: # %bb.0:
+; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
+; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
+; ALL-NEXT: retq
+ %ld0 = load <1 x double>, ptr %p0, align 32
+ %ld1 = load <1 x double>, ptr %p1, align 32
+ %bcst0 = shufflevector <1 x double> %ld0, <1 x double> poison, <4 x i32> zeroinitializer
+ %bcst1 = shufflevector <1 x double> %ld1, <1 x double> poison, <4 x i32> zeroinitializer
+ %blend = shufflevector <4 x double> %bcst0, <4 x double> %bcst1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+ ret <4 x double> %blend
+}
----------------
RKSimon wrote:
the <1 x double> cases are the only ones that are actually testing your patch as they do get converted to build vectors - you should probably remove the others.
I've added better test coverage at acc335bfa7b00ab19bf9832870aaf207f587b48b which create build_vector nodes more directly.
https://github.com/llvm/llvm-project/pull/135753
More information about the llvm-commits
mailing list