[llvm] [AArch64][SVE] Use INS when moving elements from bottom 128b of SVE type (PR #114034)

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 29 11:34:43 PDT 2024


================
@@ -7222,44 +7222,101 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
                    V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
           )>;
 
-multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
-                                ValueType VTScal, Instruction INS> {
-  def : Pat<(VT128 (vector_insert V128:$src,
-                        (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+// Insert an extracted vector element into a 128-bit Neon vector
+multiclass Neon_INS_elt_pattern_v128<ValueType VT128, ValueType VT64, ValueType VTSVE,
+                                     ValueType VTScal, Operand ExIdxTy, Instruction INS> {
+  // Extracting from the lower 128-bits of an SVE vector
+  def : Pat<(VT128 (vector_insert VT128:$Rn,
+                      (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
+                      (i64 imm:$Immd))),
+            (INS VT128:$Rn, imm:$Immd, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn)>;
+
+  // Extracting from another Neon vector
+  def : Pat<(VT128 (vector_insert V128:$Rn,
+                        (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
                         (i64 imm:$Immd))),
-            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
-
-  def : Pat<(VT128 (vector_insert V128:$src,
-                        (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
-                        (i64 imm:$Immd))),
-            (INS V128:$src, imm:$Immd,
-                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
+            (INS V128:$Rn, imm:$Immd, V128:$Rm, imm:$Immn)>;
+
+  def : Pat<(VT128 (vector_insert V128:$Rn,
+                      (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))),
+                      (i64 imm:$Immd))),
+            (INS V128:$Rn, imm:$Immd,
+                (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn)>;
+}
+
+// Insert an extracted vector element into a 64-bit Neon vector
+multiclass Neon_INS_elt_pattern_v64<ValueType VT128, ValueType VT64, ValueType VTSVE,
+                                     ValueType VTScal, Operand ExIdxTy, Instruction INS> {
+  // Extracting from the lower 128-bits of an SVE vector
+  def : Pat<(VT64 (vector_insert VT64:$Rn,
+                      (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
+                      (i64 imm:$Immd))),
+            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), VT64:$Rn, dsub), imm:$Immd,
+                                 (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn),
+                            dsub)>;
 
-  def : Pat<(VT64 (vector_insert V64:$src,
-                        (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+  // Extracting from another Neon vector
+  def : Pat<(VT64 (vector_insert V64:$Rn,
+                        (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
                         (i64 imm:$Immd))),
-            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
-                                 imm:$Immd, V128:$Rn, imm:$Immn),
+            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$Rn, dsub),
+                                 imm:$Immd, V128:$Rm, imm:$Immn),
                             dsub)>;
 
-  def : Pat<(VT64 (vector_insert V64:$src,
-                        (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+  def : Pat<(VT64 (vector_insert V64:$Rn,
+                        (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))),
                         (i64 imm:$Immd))),
             (EXTRACT_SUBREG
-                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
-                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
+                (INS (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immd,
+                     (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
+                dsub)>;
+}
+
+// Special case for <1 x double/i64> - insertion may be vector_from_scalar or
+// (vector_insert (vec) 0).
+multiclass Neon_INS_elt_pattern_v64d<ValueType VT128, ValueType VT64, ValueType VTSVE,
+                                     ValueType VTScal> {
+  // Extracting from the lower 128-bits of an SVE vector
+  def : Pat<(VT64 (vec_ins_or_scal_vec
+                      (VTScal (vector_extract VTSVE:$Rm, VectorIndexD:$Immn)))),
+            (EXTRACT_SUBREG
+                (INSvi64lane (IMPLICIT_DEF), 0, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)),
+                              VectorIndexD:$Immn),
                 dsub)>;
+
+  def : Pat<(VT64 (vec_ins_or_scal_vec
+                        (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))))),
+            (EXTRACT_SUBREG
+                (INSvi64lane (IMPLICIT_DEF), (i64 0), V128:$Rm, imm:$Immn),
+                dsub)>;
+
+  // Extracting from another NEON vector
+  def : Pat<(VT64 (vec_ins_or_scal_vec
+                      (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))))),
+            (EXTRACT_SUBREG
+                (INSvi64lane (IMPLICIT_DEF), (i64 0),
+                             (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
+                dsub)>;
+}
+
+multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType SVESrcVT,
+                                ValueType VTScal, Operand ExIdxTy, Instruction INS> {
+  defm : Neon_INS_elt_pattern_v64<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
+  defm : Neon_INS_elt_pattern_v128<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
 }
 
-defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
+defm : Neon_INS_elt_pattern<v4f32,  v2f32,  nxv4f32,  f32,  VectorIndexS, INSvi32lane>;
----------------
paulwalker-arm wrote:

This first `v4f32` line should be removed because it's duplicated below.

https://github.com/llvm/llvm-project/pull/114034


More information about the llvm-commits mailing list