[llvm] [LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover more combinations. (PR #115189)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 07:14:22 PST 2024


================
@@ -56,19 +151,133 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
   ret <vscale x 8 x half> %.splat
 }
 
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 2 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
----------------
rj-jesus wrote:

> Scaling the index requires more tablegen/c++ because I think we'd need a set of ComplexPatterns so I figured I'd do that under a separate PR and keep this one simple by just adding patterns so that I'm not delaying your match patch.

That sounds good, thanks very much!

> The reason we do this is to ensure a predicate like `<vscale x N x i1>` works in the same way for all vectors of length `vscale x N` regardless of the element type.

Ah, I see! Cheers, that makes sense!

https://github.com/llvm/llvm-project/pull/115189


More information about the llvm-commits mailing list