[llvm] [LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover more combinations. (PR #115189)
    Paul Walker via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Fri Nov  8 06:54:31 PST 2024
    
    
  
================
@@ -56,19 +151,133 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
   ret <vscale x 8 x half> %.splat
 }
 
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 2 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 8 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    ret
+  %1 = extractelement <vscale x 4 x half> %data, i16 1
+  %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+  %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z0.d[1]
----------------
paulwalker-arm wrote:
Scaling the index requires more tablegen/c++ because I think we'd need a set of ComplexPatterns so I figured I'd do that under a separate PR and keep this one simple by just adding patterns so that I'm not delaying your match patch.
https://github.com/llvm/llvm-project/pull/115189
    
    
More information about the llvm-commits
mailing list