[llvm] [LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover more combinations. (PR #115189)

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 07:00:11 PST 2024


================
@@ -56,19 +151,133 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
   ret <vscale x 8 x half> %.splat
 }
 
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 2 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
----------------
paulwalker-arm wrote:

Unpacked fixed length vectors don't really exist for AArch64. The fixed length vector types are treated as packed vectors of different length.

Whereas for SVE we treat vectors that are smaller than `vscale x 128` as unpacked vectors whereby there elements are evenly distributed across a `vscale x 128` sized vector.  This is why for scalable vectors the index (or element type) must be scaled but for fixed length they should be used as is.

NOTE: This is for in-reg only.  The memory format of all vector types are considered packed vector of different length.

https://github.com/llvm/llvm-project/pull/115189


More information about the llvm-commits mailing list