[llvm] [LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover all combinations. (PR #115189)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 7 07:57:53 PST 2024
================
@@ -56,19 +151,133 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
ret <vscale x 8 x half> %.splat
}
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
----------------
rj-jesus wrote:
This doesn't seem right as we'll only have two copies of the extracted element per 128-bit segment, but we expected four copies, right?
https://github.com/llvm/llvm-project/pull/115189
More information about the llvm-commits
mailing list