[llvm] [AArch64][SVE] Use INS when moving elements from bottom 128b of SVE type (PR #114034)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 02:53:15 PDT 2024
https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/114034
>From 5fce5b41d1f0b2b901b41720deac52014d1c5f98 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 29 Oct 2024 00:00:28 +0000
Subject: [PATCH 1/4] [AArch64][SVE] Use INS when moving elements from bottom
128b of SVE type
Moving elements from a scalable vector to a fixed-lengh vector should use INS when we know that the
extracted element is in the bottom 128-bits of the scalable vector. This avoids inserting unecessary
UMOV/FMOV instructions.
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 50 ++
.../CodeGen/AArch64/neon-insert-sve-elt.ll | 469 ++++++++++++++++++
.../AArch64/sve-extract-fixed-vector.ll | 47 +-
.../AArch64/sve-fixed-length-shuffles.ll | 98 ++--
4 files changed, 578 insertions(+), 86 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2b69903b133fe3..f678ce1058bafd 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3367,6 +3367,47 @@ let Predicates = [HasSVEorSME] in {
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
} // End HasNEON
+ // Use INS (element) when moving an element from the bottom 128-bits of an SVE type to a NEON vector.
+ multiclass Neon_ins_sve_elt_pattern<ValueType NeonTy, ValueType NeonQTy, ValueType SVETy, ValueType ScalTy,
+ Operand IdxTy, Operand NarrowIdxTy, Instruction INS> {
+ // Insert into 128-bit NEON type from lowest 128-bits of SVE type
+ def : Pat<(NeonQTy (vector_insert V128:$src,
+ (ScalTy (vector_extract SVETy:$Rn, IdxTy:$idx_extract)),
+ (IdxTy:$idx_insert))),
+ (INS V128:$src, IdxTy:$idx_insert,
+ (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)), IdxTy:$idx_extract)>;
+
+ // Insert into 64-bit NEON type from lowest 128-bits of SVE type
+ def : Pat<(NeonTy (vector_insert V64:$src,
+ (ScalTy (vector_extract SVETy:$Rn, IdxTy:$idx_extract)),
+ (NarrowIdxTy:$idx_insert))),
+ (EXTRACT_SUBREG
+ (INS
+ (INSERT_SUBREG (NeonQTy (IMPLICIT_DEF)), V64:$src, dsub), NarrowIdxTy:$idx_insert,
+ (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)), IdxTy:$idx_extract),
+ dsub)>;
+ }
+
+ // Inserting into <1 x double/i64> will just create a new vector from the scalar value.
+ multiclass Neon_ins_64b_sve_elt_pattern<ValueType NeonTy, ValueType NeonQTy, ValueType SVETy,
+ ValueType ScalTy> {
+ // Insert into 128-bit NEON type from lowest 128-bits of SVE type
+ def : Pat<(NeonQTy (vector_insert V128:$src,
+ (ScalTy (vector_extract SVETy:$Rn, VectorIndexD:$idx_extract)),
+ (VectorIndexD:$idx_insert))),
+ (INSvi64lane
+ V128:$src, VectorIndexD:$idx_insert, (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)),
+ VectorIndexD:$idx_extract)>;
+
+ // Insert into 64-bit NEON type from lowest 128-bits of SVE type
+ def : Pat<(NeonTy (scalar_to_vector
+ (ScalTy (vector_extract SVETy:$Rn, VectorIndexD:$idx_extract)))),
+ (EXTRACT_SUBREG
+ (INSvi64lane (IMPLICIT_DEF), 0, (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)),
+ VectorIndexD:$idx_extract),
+ dsub)>;
+ }
+
let Predicates = [HasNEON] in {
def : Pat<(sext_inreg (vector_extract nxv16i8:$vec, VectorIndexB:$index), i8),
(SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
@@ -3380,6 +3421,15 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
(SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
+
+ defm : Neon_ins_sve_elt_pattern<v8i8, v16i8, nxv16i8, i32, VectorIndexB, VectorIndexH, INSvi8lane>;
+ defm : Neon_ins_sve_elt_pattern<v4f16, v8f16, nxv8f16, f16, VectorIndexH, VectorIndexS, INSvi16lane>;
+ defm : Neon_ins_sve_elt_pattern<v4bf16, v8bf16, nxv8bf16, bf16, VectorIndexH, VectorIndexS, INSvi16lane>;
+ defm : Neon_ins_sve_elt_pattern<v4i16, v8i16, nxv8i16, i32, VectorIndexH, VectorIndexS, INSvi16lane>;
+ defm : Neon_ins_sve_elt_pattern<v2f32, v4f32, nxv4f32, f32, VectorIndexS, VectorIndexD, INSvi32lane>;
+ defm : Neon_ins_sve_elt_pattern<v2i32, v4i32, nxv4i32, i32, VectorIndexS, VectorIndexD, INSvi32lane>;
+ defm : Neon_ins_64b_sve_elt_pattern<v1f64, v2f64, nxv2f64, f64>;
+ defm : Neon_ins_64b_sve_elt_pattern<v1i64, v2i64, nxv2i64, i64>;
} // End HasNEON
// Extract first element from vector.
diff --git a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
new file mode 100644
index 00000000000000..0f4eec4fdfda1b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
@@ -0,0 +1,469 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+neon < %s | FileCheck %s
+
+; Inserting an element from the bottom 128-bits of an SVE type into a NEON vector should use INS (element) to
+; avoid pointless FMOV trips.
+
+; --------- extraction from nxv16i8
+
+define <8 x i8> @test_lane0_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: test_lane0_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.b[0], v1.b[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 16 x i8> %b, i32 0
+ %d = insertelement <8 x i8> %a, i8 %c, i32 0
+ ret <8 x i8> %d
+}
+
+define <8 x i8> @test_lane15_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: test_lane15_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.b[7], v1.b[15]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 16 x i8> %b, i32 15
+ %d = insertelement <8 x i8> %a, i8 %c, i32 7
+ ret <8 x i8> %d
+}
+
+define <16 x i8> @test_q_lane0_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: test_q_lane0_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.b[0], v1.b[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 16 x i8> %b, i32 0
+ %d = insertelement <16 x i8> %a, i8 %c, i32 0
+ ret <16 x i8> %d
+}
+
+define <16 x i8> @test_q_lane15_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: test_q_lane15_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.b[15], v1.b[15]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 16 x i8> %b, i32 15
+ %d = insertelement <16 x i8> %a, i8 %c, i32 15
+ ret <16 x i8> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <16 x i8> @test_q_lane16_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: test_q_lane16_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.b, z1.b[16]
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: mov v0.b[15], w8
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 16 x i8> %b, i32 16
+ %d = insertelement <16 x i8> %a, i8 %c, i32 15
+ ret <16 x i8> %d
+}
+
+; --------- extraction from nxv8f16
+
+define <4 x half> @test_lane0_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: test_lane0_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.h[0], v1.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x half> %b, i32 0
+ %d = insertelement <4 x half> %a, half %c, i32 0
+ ret <4 x half> %d
+}
+
+define <4 x half> @test_lane7_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: test_lane7_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.h[3], v1.h[7]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x half> %b, i32 7
+ %d = insertelement <4 x half> %a, half %c, i32 3
+ ret <4 x half> %d
+}
+
+define <8 x half> @test_q_lane0_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: test_q_lane0_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.h[0], v1.h[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x half> %b, i32 0
+ %d = insertelement <8 x half> %a, half %c, i32 0
+ ret <8 x half> %d
+}
+
+define <8 x half> @test_q_lane7_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: test_q_lane7_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.h[7], v1.h[7]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x half> %b, i32 7
+ %d = insertelement <8 x half> %a, half %c, i32 7
+ ret <8 x half> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <8 x half> @test_q_lane8_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: test_q_lane8_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.h, z1.h[8]
+; CHECK-NEXT: mov v0.h[7], v1.h[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x half> %b, i32 8
+ %d = insertelement <8 x half> %a, half %c, i32 7
+ ret <8 x half> %d
+}
+
+; --------- extraction from nxv8bf16
+
+define <4 x bfloat> @test_lane0_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: test_lane0_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.h[0], v1.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x bfloat> %b, i32 0
+ %d = insertelement <4 x bfloat> %a, bfloat %c, i32 0
+ ret <4 x bfloat> %d
+}
+
+define <4 x bfloat> @test_lane7_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: test_lane7_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.h[3], v1.h[7]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x bfloat> %b, i32 7
+ %d = insertelement <4 x bfloat> %a, bfloat %c, i32 3
+ ret <4 x bfloat> %d
+}
+
+define <8 x bfloat> @test_q_lane0_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: test_q_lane0_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.h[0], v1.h[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x bfloat> %b, i32 0
+ %d = insertelement <8 x bfloat> %a, bfloat %c, i32 0
+ ret <8 x bfloat> %d
+}
+
+define <8 x bfloat> @test_q_lane7_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: test_q_lane7_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.h[7], v1.h[7]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x bfloat> %b, i32 7
+ %d = insertelement <8 x bfloat> %a, bfloat %c, i32 7
+ ret <8 x bfloat> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <8 x bfloat> @test_q_lane8_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: test_q_lane8_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.h, z1.h[8]
+; CHECK-NEXT: mov v0.h[7], v1.h[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x bfloat> %b, i32 8
+ %d = insertelement <8 x bfloat> %a, bfloat %c, i32 7
+ ret <8 x bfloat> %d
+}
+
+; --------- extraction from nxv8i16
+
+define <4 x i16> @test_lane0_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: test_lane0_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.h[0], v1.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x i16> %b, i32 0
+ %d = insertelement <4 x i16> %a, i16 %c, i32 0
+ ret <4 x i16> %d
+}
+
+define <4 x i16> @test_lane7_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: test_lane7_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.h[3], v1.h[7]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x i16> %b, i32 7
+ %d = insertelement <4 x i16> %a, i16 %c, i32 3
+ ret <4 x i16> %d
+}
+
+define <8 x i16> @test_q_lane0_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: test_q_lane0_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.h[0], v1.h[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x i16> %b, i32 0
+ %d = insertelement <8 x i16> %a, i16 %c, i32 0
+ ret <8 x i16> %d
+}
+
+define <8 x i16> @test_q_lane7_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: test_q_lane7_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.h[7], v1.h[7]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x i16> %b, i32 7
+ %d = insertelement <8 x i16> %a, i16 %c, i32 7
+ ret <8 x i16> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <8 x i16> @test_q_lane8_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: test_q_lane8_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.h, z1.h[8]
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: mov v0.h[7], w8
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 8 x i16> %b, i32 8
+ %d = insertelement <8 x i16> %a, i16 %c, i32 7
+ ret <8 x i16> %d
+}
+
+; --------- extraction from nxv4f32
+
+define <2 x float> @test_lane0_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: test_lane0_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.s[0], v1.s[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x float> %b, i32 0
+ %d = insertelement <2 x float> %a, float %c, i32 0
+ ret <2 x float> %d
+}
+
+define <2 x float> @test_lane3_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: test_lane3_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.s[1], v1.s[3]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x float> %b, i32 3
+ %d = insertelement <2 x float> %a, float %c, i32 1
+ ret <2 x float> %d
+}
+
+define <4 x float> @test_q_lane0_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: test_q_lane0_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.s[0], v1.s[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x float> %b, i32 0
+ %d = insertelement <4 x float> %a, float %c, i32 0
+ ret <4 x float> %d
+}
+
+define <4 x float> @test_q_lane3_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: test_q_lane3_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.s[3], v1.s[3]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x float> %b, i32 3
+ %d = insertelement <4 x float> %a, float %c, i32 3
+ ret <4 x float> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <4 x float> @test_q_lane4_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: test_q_lane4_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.s, z1.s[4]
+; CHECK-NEXT: mov v0.s[3], v1.s[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x float> %b, i32 4
+ %d = insertelement <4 x float> %a, float %c, i32 3
+ ret <4 x float> %d
+}
+
+; --------- extraction from nxv4i32
+
+define <2 x i32> @test_lane0_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_lane0_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.s[0], v1.s[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x i32> %b, i32 0
+ %d = insertelement <2 x i32> %a, i32 %c, i32 0
+ ret <2 x i32> %d
+}
+
+define <2 x i32> @test_lane3_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_lane3_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov v0.s[1], v1.s[3]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x i32> %b, i32 3
+ %d = insertelement <2 x i32> %a, i32 %c, i32 1
+ ret <2 x i32> %d
+}
+
+define <4 x i32> @test_q_lane0_nxv4i32(<4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_q_lane0_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.s[0], v1.s[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x i32> %b, i32 0
+ %d = insertelement <4 x i32> %a, i32 %c, i32 0
+ ret <4 x i32> %d
+}
+
+define <4 x i32> @test_q_lane3_nxv4i32(<4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_q_lane3_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.s[3], v1.s[3]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x i32> %b, i32 3
+ %d = insertelement <4 x i32> %a, i32 %c, i32 3
+ ret <4 x i32> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <4 x i32> @test_q_lane4_nxv4i32(<4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_q_lane4_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.s, z1.s[4]
+; CHECK-NEXT: fmov w8, s1
+; CHECK-NEXT: mov v0.s[3], w8
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 4 x i32> %b, i32 4
+ %d = insertelement <4 x i32> %a, i32 %c, i32 3
+ ret <4 x i32> %d
+}
+
+; --------- extraction from nxv2f64
+
+define <1 x double> @test_lane0_nxv2f64(<1 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: test_lane0_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[0], v1.d[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x double> %b, i32 0
+ %d = insertelement <1 x double> %a, double %c, i32 0
+ ret <1 x double> %d
+}
+
+define <1 x double> @test_lane1_nxv2f64(<1 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: test_lane1_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[0], v1.d[1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x double> %b, i32 1
+ %d = insertelement <1 x double> %a, double %c, i32 0
+ ret <1 x double> %d
+}
+
+define <2 x double> @test_q_lane0_nxv2f64(<2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: test_q_lane0_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[0], v1.d[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x double> %b, i32 0
+ %d = insertelement <2 x double> %a, double %c, i32 0
+ ret <2 x double> %d
+}
+
+define <2 x double> @test_q_lane1_nxv2f64(<2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: test_q_lane1_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[1], v1.d[1]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x double> %b, i32 1
+ %d = insertelement <2 x double> %a, double %c, i32 1
+ ret <2 x double> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <2 x double> @test_q_lane2_nxv2f64(<2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: test_q_lane2_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.d, z1.d[2]
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x double> %b, i32 2
+ %d = insertelement <2 x double> %a, double %c, i32 1
+ ret <2 x double> %d
+}
+
+; --------- extraction from nxv2i64
+
+define <1 x i64> @test_lane0_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: test_lane0_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[0], v1.d[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x i64> %b, i32 0
+ %d = insertelement <1 x i64> %a, i64 %c, i32 0
+ ret <1 x i64> %d
+}
+
+define <1 x i64> @test_lane1_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: test_lane1_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[0], v1.d[1]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x i64> %b, i32 1
+ %d = insertelement <1 x i64> %a, i64 %c, i32 0
+ ret <1 x i64> %d
+}
+
+define <2 x i64> @test_q_lane0_nxv2i64(<2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: test_q_lane0_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[0], v1.d[0]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x i64> %b, i32 0
+ %d = insertelement <2 x i64> %a, i64 %c, i32 0
+ ret <2 x i64> %d
+}
+
+define <2 x i64> @test_q_lane1_nxv2i64(<2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: test_q_lane1_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov v0.d[1], v1.d[1]
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x i64> %b, i32 1
+ %d = insertelement <2 x i64> %a, i64 %c, i32 1
+ ret <2 x i64> %d
+}
+
+; (negative test) Extracted element is not within Vn
+define <2 x i64> @test_q_lane2_nxv2i64(<2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: test_q_lane2_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.d, z1.d[2]
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: ret
+ %c = extractelement <vscale x 2 x i64> %b, i32 2
+ %d = insertelement <2 x i64> %a, i64 %c, i32 1
+ ret <2 x i64> %d
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index 678afc4dea3092..518e3573b5edd3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -290,41 +290,28 @@ define <8 x i1> @extract_v8i1_nxv8i1(<vscale x 8 x i1> %inmask) {
ret <8 x i1> %mask
}
+; TODO: Apply better reasoning when lowering extract_subvector from the bottom 128-bits
+; of an SVE type.
define <16 x i1> @extract_v16i1_nxv16i1(<vscale x 16 x i1> %inmask) {
; CHECK-LABEL: extract_v16i1_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
-; CHECK-NEXT: umov w8, v1.b[1]
; CHECK-NEXT: mov v0.16b, v1.16b
-; CHECK-NEXT: umov w9, v1.b[2]
-; CHECK-NEXT: mov v0.b[1], w8
-; CHECK-NEXT: umov w8, v1.b[3]
-; CHECK-NEXT: mov v0.b[2], w9
-; CHECK-NEXT: umov w9, v1.b[4]
-; CHECK-NEXT: mov v0.b[3], w8
-; CHECK-NEXT: umov w8, v1.b[5]
-; CHECK-NEXT: mov v0.b[4], w9
-; CHECK-NEXT: umov w9, v1.b[6]
-; CHECK-NEXT: mov v0.b[5], w8
-; CHECK-NEXT: umov w8, v1.b[7]
-; CHECK-NEXT: mov v0.b[6], w9
-; CHECK-NEXT: umov w9, v1.b[8]
-; CHECK-NEXT: mov v0.b[7], w8
-; CHECK-NEXT: umov w8, v1.b[9]
-; CHECK-NEXT: mov v0.b[8], w9
-; CHECK-NEXT: umov w9, v1.b[10]
-; CHECK-NEXT: mov v0.b[9], w8
-; CHECK-NEXT: umov w8, v1.b[11]
-; CHECK-NEXT: mov v0.b[10], w9
-; CHECK-NEXT: umov w9, v1.b[12]
-; CHECK-NEXT: mov v0.b[11], w8
-; CHECK-NEXT: umov w8, v1.b[13]
-; CHECK-NEXT: mov v0.b[12], w9
-; CHECK-NEXT: umov w9, v1.b[14]
-; CHECK-NEXT: mov v0.b[13], w8
-; CHECK-NEXT: umov w8, v1.b[15]
-; CHECK-NEXT: mov v0.b[14], w9
-; CHECK-NEXT: mov v0.b[15], w8
+; CHECK-NEXT: mov v0.b[1], v1.b[1]
+; CHECK-NEXT: mov v0.b[2], v1.b[2]
+; CHECK-NEXT: mov v0.b[3], v1.b[3]
+; CHECK-NEXT: mov v0.b[4], v1.b[4]
+; CHECK-NEXT: mov v0.b[5], v1.b[5]
+; CHECK-NEXT: mov v0.b[6], v1.b[6]
+; CHECK-NEXT: mov v0.b[7], v1.b[7]
+; CHECK-NEXT: mov v0.b[8], v1.b[8]
+; CHECK-NEXT: mov v0.b[9], v1.b[9]
+; CHECK-NEXT: mov v0.b[10], v1.b[10]
+; CHECK-NEXT: mov v0.b[11], v1.b[11]
+; CHECK-NEXT: mov v0.b[12], v1.b[12]
+; CHECK-NEXT: mov v0.b[13], v1.b[13]
+; CHECK-NEXT: mov v0.b[14], v1.b[14]
+; CHECK-NEXT: mov v0.b[15], v1.b[15]
; CHECK-NEXT: ret
%mask = call <16 x i1> @llvm.vector.extract.v16i1.nxv16i1(<vscale x 16 x i1> %inmask, i64 0)
ret <16 x i1> %mask
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index fb169491b0c909..749a1866e7192a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -30,78 +30,64 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: // %bb.1: // %vector.body
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mov x9, #8 // =0x8
+; CHECK-NEXT: mov x10, #24 // =0x18
; CHECK-NEXT: umov w8, v0.b[8]
-; CHECK-NEXT: umov w9, v0.b[9]
-; CHECK-NEXT: umov w10, v0.b[1]
; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: umov w11, v0.b[15]
+; CHECK-NEXT: mov v1.b[1], v0.b[1]
; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: umov w8, v0.b[10]
-; CHECK-NEXT: mov v1.b[1], w10
-; CHECK-NEXT: umov w10, v0.b[11]
-; CHECK-NEXT: mov v2.b[1], w9
-; CHECK-NEXT: umov w9, v0.b[2]
-; CHECK-NEXT: mov v2.b[2], w8
-; CHECK-NEXT: umov w8, v0.b[3]
-; CHECK-NEXT: mov v1.b[2], w9
-; CHECK-NEXT: umov w9, v0.b[12]
-; CHECK-NEXT: mov v2.b[3], w10
-; CHECK-NEXT: umov w10, v0.b[4]
-; CHECK-NEXT: mov v1.b[3], w8
-; CHECK-NEXT: umov w8, v0.b[13]
-; CHECK-NEXT: mov v2.b[4], w9
-; CHECK-NEXT: umov w9, v0.b[5]
-; CHECK-NEXT: mov v1.b[4], w10
-; CHECK-NEXT: umov w10, v0.b[14]
-; CHECK-NEXT: mov v2.b[5], w8
-; CHECK-NEXT: umov w8, v0.b[6]
-; CHECK-NEXT: mov v1.b[5], w9
-; CHECK-NEXT: umov w9, v0.b[7]
+; CHECK-NEXT: mov x8, #16 // =0x10
+; CHECK-NEXT: mov v2.b[1], v0.b[9]
+; CHECK-NEXT: mov v1.b[2], v0.b[2]
+; CHECK-NEXT: mov v2.b[2], v0.b[10]
+; CHECK-NEXT: mov v1.b[3], v0.b[3]
+; CHECK-NEXT: mov v2.b[3], v0.b[11]
+; CHECK-NEXT: mov v1.b[4], v0.b[4]
+; CHECK-NEXT: mov v2.b[4], v0.b[12]
+; CHECK-NEXT: mov v1.b[5], v0.b[5]
+; CHECK-NEXT: mov v2.b[5], v0.b[13]
+; CHECK-NEXT: mov v1.b[6], v0.b[6]
+; CHECK-NEXT: mov v2.b[6], v0.b[14]
+; CHECK-NEXT: mov v1.b[7], v0.b[7]
+; CHECK-NEXT: mov v2.b[7], v0.b[15]
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
-; CHECK-NEXT: mov v2.b[6], w10
-; CHECK-NEXT: mov v1.b[6], w8
+; CHECK-NEXT: uunpklo z1.h, z1.b
; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: mov x8, #16 // =0x10
-; CHECK-NEXT: mov x10, #8 // =0x8
-; CHECK-NEXT: mov v2.b[7], w11
-; CHECK-NEXT: mov v1.b[7], w9
+; CHECK-NEXT: uunpklo z2.h, z2.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z3.h, z3.b
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: mov x9, #24 // =0x18
-; CHECK-NEXT: uunpklo z2.h, z2.b
-; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: uunpklo z3.s, z3.h
-; CHECK-NEXT: lsl z0.s, z0.s, #31
; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: lsl z3.s, z3.s, #31
-; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: asr z3.s, z3.s, #31
-; CHECK-NEXT: lsl z2.s, z2.s, #31
; CHECK-NEXT: lsl z1.s, z1.s, #31
-; CHECK-NEXT: and z0.s, z0.s, #0x1
-; CHECK-NEXT: and z3.s, z3.s, #0x1
-; CHECK-NEXT: asr z2.s, z2.s, #31
+; CHECK-NEXT: uunpklo z3.s, z3.h
+; CHECK-NEXT: lsl z0.s, z0.s, #31
; CHECK-NEXT: asr z1.s, z1.s, #31
-; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
-; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, #0
-; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, x9, lsl #2]
-; CHECK-NEXT: and z2.s, z2.s, #0x1
+; CHECK-NEXT: lsl z2.s, z2.s, #31
+; CHECK-NEXT: asr z0.s, z0.s, #31
; CHECK-NEXT: and z1.s, z1.s, #0x1
-; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
-; CHECK-NEXT: cmpne p3.s, p0/z, z2.s, #0
+; CHECK-NEXT: lsl z3.s, z3.s, #31
+; CHECK-NEXT: asr z2.s, z2.s, #31
+; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: cmpne p4.s, p0/z, z1.s, #0
-; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
-; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0
-; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
-; CHECK-NEXT: mov z2.s, p3/m, #0 // =0x0
+; CHECK-NEXT: asr z3.s, z3.s, #31
+; CHECK-NEXT: and z2.s, z2.s, #0x1
+; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT: and z3.s, z3.s, #0x1
+; CHECK-NEXT: cmpne p2.s, p0/z, z2.s, #0
+; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, x9, lsl #2]
; CHECK-NEXT: mov z1.s, p4/m, #0 // =0x0
-; CHECK-NEXT: st1w { z3.s }, p0, [x0, x9, lsl #2]
-; CHECK-NEXT: st1w { z2.s }, p0, [x0, x10, lsl #2]
+; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
+; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2]
+; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z2.s, p2/m, #0 // =0x0
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
+; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
+; CHECK-NEXT: st1w { z2.s }, p0, [x0, x9, lsl #2]
+; CHECK-NEXT: st1w { z3.s }, p0, [x0, x10, lsl #2]
; CHECK-NEXT: .LBB1_2: // %exit
; CHECK-NEXT: ret
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
>From 2d6e6dff068e2f69c51ebd6abad8bcf8f4144b85 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 29 Oct 2024 16:51:10 +0000
Subject: [PATCH 2/4] Merge new patterns into existing Neon_INS_elt_patterns
work
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 111 +++++++++++++-----
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 50 --------
2 files changed, 84 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 6194de2d56b630..e79e002bc601e2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7222,44 +7222,101 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
)>;
-multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
- ValueType VTScal, Instruction INS> {
- def : Pat<(VT128 (vector_insert V128:$src,
- (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+// Insert an extracted vector element into a 128-bit Neon vector
+multiclass Neon_INS_elt_pattern_v128<ValueType VT128, ValueType VT64, ValueType VTSVE,
+ ValueType VTScal, Operand ExIdxTy, Instruction INS> {
+ // Extracting from the lower 128-bits of an SVE vector
+ def : Pat<(VT128 (vector_insert VT128:$Rn,
+ (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
+ (i64 imm:$Immd))),
+ (INS VT128:$Rn, imm:$Immd, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn)>;
+
+ // Extracting from another Neon vector
+ def : Pat<(VT128 (vector_insert V128:$Rn,
+ (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
(i64 imm:$Immd))),
- (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
-
- def : Pat<(VT128 (vector_insert V128:$src,
- (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
- (i64 imm:$Immd))),
- (INS V128:$src, imm:$Immd,
- (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
+ (INS V128:$Rn, imm:$Immd, V128:$Rm, imm:$Immn)>;
+
+ def : Pat<(VT128 (vector_insert V128:$Rn,
+ (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
+ (INS V128:$Rn, imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn)>;
+}
+
+// Insert an extracted vector element into a 64-bit Neon vector
+multiclass Neon_INS_elt_pattern_v64<ValueType VT128, ValueType VT64, ValueType VTSVE,
+ ValueType VTScal, Operand ExIdxTy, Instruction INS> {
+ // Extracting from the lower 128-bits of an SVE vector
+ def : Pat<(VT64 (vector_insert VT64:$Rn,
+ (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
+ (i64 imm:$Immd))),
+ (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), VT64:$Rn, dsub), imm:$Immd,
+ (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn),
+ dsub)>;
- def : Pat<(VT64 (vector_insert V64:$src,
- (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+ // Extracting from another Neon vector
+ def : Pat<(VT64 (vector_insert V64:$Rn,
+ (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
(i64 imm:$Immd))),
- (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
- imm:$Immd, V128:$Rn, imm:$Immn),
+ (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$Rn, dsub),
+ imm:$Immd, V128:$Rm, imm:$Immn),
dsub)>;
- def : Pat<(VT64 (vector_insert V64:$src,
- (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+ def : Pat<(VT64 (vector_insert V64:$Rn,
+ (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))),
(i64 imm:$Immd))),
(EXTRACT_SUBREG
- (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
- (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
+ (INS (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
+ dsub)>;
+}
+
+// Special case for <1 x double/i64> - insertion may be vector_from_scalar or
+// (vector_insert (vec) 0).
+multiclass Neon_INS_elt_pattern_v64d<ValueType VT128, ValueType VT64, ValueType VTSVE,
+ ValueType VTScal> {
+ // Extracting from the lower 128-bits of an SVE vector
+ def : Pat<(VT64 (vec_ins_or_scal_vec
+ (VTScal (vector_extract VTSVE:$Rm, VectorIndexD:$Immn)))),
+ (EXTRACT_SUBREG
+ (INSvi64lane (IMPLICIT_DEF), 0, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)),
+ VectorIndexD:$Immn),
dsub)>;
+
+ def : Pat<(VT64 (vec_ins_or_scal_vec
+ (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))))),
+ (EXTRACT_SUBREG
+ (INSvi64lane (IMPLICIT_DEF), (i64 0), V128:$Rm, imm:$Immn),
+ dsub)>;
+
+ // Extracting from another NEON vector
+ def : Pat<(VT64 (vec_ins_or_scal_vec
+ (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))))),
+ (EXTRACT_SUBREG
+ (INSvi64lane (IMPLICIT_DEF), (i64 0),
+ (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
+ dsub)>;
+}
+
+multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType SVESrcVT,
+ ValueType VTScal, Operand ExIdxTy, Instruction INS> {
+ defm : Neon_INS_elt_pattern_v64<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
+ defm : Neon_INS_elt_pattern_v128<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
}
-defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
+defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
+defm : Neon_INS_elt_pattern<v8f16, v4f16, nxv8f16, f16, VectorIndexH, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
+defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
+defm : Neon_INS_elt_pattern_v128<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane>;
+defm : Neon_INS_elt_pattern_v64d<v2f64, v1f64, nxv2f64, f64>;
+defm : Neon_INS_elt_pattern_v128<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane>;
+defm : Neon_INS_elt_pattern_v64d<v2i64, v1i64, nxv2i64, i64>;
// Insert from bitcast
// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index f678ce1058bafd..2b69903b133fe3 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3367,47 +3367,6 @@ let Predicates = [HasSVEorSME] in {
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
} // End HasNEON
- // Use INS (element) when moving an element from the bottom 128-bits of an SVE type to a NEON vector.
- multiclass Neon_ins_sve_elt_pattern<ValueType NeonTy, ValueType NeonQTy, ValueType SVETy, ValueType ScalTy,
- Operand IdxTy, Operand NarrowIdxTy, Instruction INS> {
- // Insert into 128-bit NEON type from lowest 128-bits of SVE type
- def : Pat<(NeonQTy (vector_insert V128:$src,
- (ScalTy (vector_extract SVETy:$Rn, IdxTy:$idx_extract)),
- (IdxTy:$idx_insert))),
- (INS V128:$src, IdxTy:$idx_insert,
- (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)), IdxTy:$idx_extract)>;
-
- // Insert into 64-bit NEON type from lowest 128-bits of SVE type
- def : Pat<(NeonTy (vector_insert V64:$src,
- (ScalTy (vector_extract SVETy:$Rn, IdxTy:$idx_extract)),
- (NarrowIdxTy:$idx_insert))),
- (EXTRACT_SUBREG
- (INS
- (INSERT_SUBREG (NeonQTy (IMPLICIT_DEF)), V64:$src, dsub), NarrowIdxTy:$idx_insert,
- (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)), IdxTy:$idx_extract),
- dsub)>;
- }
-
- // Inserting into <1 x double/i64> will just create a new vector from the scalar value.
- multiclass Neon_ins_64b_sve_elt_pattern<ValueType NeonTy, ValueType NeonQTy, ValueType SVETy,
- ValueType ScalTy> {
- // Insert into 128-bit NEON type from lowest 128-bits of SVE type
- def : Pat<(NeonQTy (vector_insert V128:$src,
- (ScalTy (vector_extract SVETy:$Rn, VectorIndexD:$idx_extract)),
- (VectorIndexD:$idx_insert))),
- (INSvi64lane
- V128:$src, VectorIndexD:$idx_insert, (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)),
- VectorIndexD:$idx_extract)>;
-
- // Insert into 64-bit NEON type from lowest 128-bits of SVE type
- def : Pat<(NeonTy (scalar_to_vector
- (ScalTy (vector_extract SVETy:$Rn, VectorIndexD:$idx_extract)))),
- (EXTRACT_SUBREG
- (INSvi64lane (IMPLICIT_DEF), 0, (NeonQTy (EXTRACT_SUBREG SVETy:$Rn, zsub)),
- VectorIndexD:$idx_extract),
- dsub)>;
- }
-
let Predicates = [HasNEON] in {
def : Pat<(sext_inreg (vector_extract nxv16i8:$vec, VectorIndexB:$index), i8),
(SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
@@ -3421,15 +3380,6 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
(SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
-
- defm : Neon_ins_sve_elt_pattern<v8i8, v16i8, nxv16i8, i32, VectorIndexB, VectorIndexH, INSvi8lane>;
- defm : Neon_ins_sve_elt_pattern<v4f16, v8f16, nxv8f16, f16, VectorIndexH, VectorIndexS, INSvi16lane>;
- defm : Neon_ins_sve_elt_pattern<v4bf16, v8bf16, nxv8bf16, bf16, VectorIndexH, VectorIndexS, INSvi16lane>;
- defm : Neon_ins_sve_elt_pattern<v4i16, v8i16, nxv8i16, i32, VectorIndexH, VectorIndexS, INSvi16lane>;
- defm : Neon_ins_sve_elt_pattern<v2f32, v4f32, nxv4f32, f32, VectorIndexS, VectorIndexD, INSvi32lane>;
- defm : Neon_ins_sve_elt_pattern<v2i32, v4i32, nxv4i32, i32, VectorIndexS, VectorIndexD, INSvi32lane>;
- defm : Neon_ins_64b_sve_elt_pattern<v1f64, v2f64, nxv2f64, f64>;
- defm : Neon_ins_64b_sve_elt_pattern<v1i64, v2i64, nxv2i64, i64>;
} // End HasNEON
// Extract first element from vector.
>From c357f658dd42d49f1f3fa3abc4acb8fcc9cffc8f Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 30 Oct 2024 10:55:29 +0000
Subject: [PATCH 3/4] simplify implementation of new patterns
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 111 ++++++------------
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 15 +++
2 files changed, 49 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index e79e002bc601e2..78e40d9946e747 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7222,101 +7222,58 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
)>;
-// Insert an extracted vector element into a 128-bit Neon vector
-multiclass Neon_INS_elt_pattern_v128<ValueType VT128, ValueType VT64, ValueType VTSVE,
- ValueType VTScal, Operand ExIdxTy, Instruction INS> {
- // Extracting from the lower 128-bits of an SVE vector
+// Move elements between vectors
+multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE,
+ ValueType VTScal, Operand SVEIdxTy, Instruction INS> {
+ // Extracting from the lowest 128-bits of an SVE vector
def : Pat<(VT128 (vector_insert VT128:$Rn,
- (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
+ (VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
(i64 imm:$Immd))),
- (INS VT128:$Rn, imm:$Immd, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn)>;
+ (INS VT128:$Rn, imm:$Immd, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), SVEIdxTy:$Immn)>;
- // Extracting from another Neon vector
- def : Pat<(VT128 (vector_insert V128:$Rn,
- (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
- (i64 imm:$Immd))),
- (INS V128:$Rn, imm:$Immd, V128:$Rm, imm:$Immn)>;
-
- def : Pat<(VT128 (vector_insert V128:$Rn,
- (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))),
- (i64 imm:$Immd))),
- (INS V128:$Rn, imm:$Immd,
- (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn)>;
-}
-
-// Insert an extracted vector element into a 64-bit Neon vector
-multiclass Neon_INS_elt_pattern_v64<ValueType VT128, ValueType VT64, ValueType VTSVE,
- ValueType VTScal, Operand ExIdxTy, Instruction INS> {
- // Extracting from the lower 128-bits of an SVE vector
def : Pat<(VT64 (vector_insert VT64:$Rn,
- (VTScal (vector_extract VTSVE:$Rm, (i64 ExIdxTy:$Immn))),
+ (VTScal (vector_extract VTSVE:$Rm, (i64 SVEIdxTy:$Immn))),
(i64 imm:$Immd))),
- (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), VT64:$Rn, dsub), imm:$Immd,
- (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), ExIdxTy:$Immn),
- dsub)>;
-
- // Extracting from another Neon vector
- def : Pat<(VT64 (vector_insert V64:$Rn,
- (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))),
+ (EXTRACT_SUBREG
+ (INS (SUBREG_TO_REG (i64 0), VT64:$Rn, dsub), imm:$Immd,
+ (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)), SVEIdxTy:$Immn),
+ dsub)>;
+ // Extracting from another NEON vector
+ def : Pat<(VT128 (vector_insert V128:$src,
+ (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
(i64 imm:$Immd))),
- (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$Rn, dsub),
- imm:$Immd, V128:$Rm, imm:$Immn),
- dsub)>;
+ (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
- def : Pat<(VT64 (vector_insert V64:$Rn,
- (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))),
+ def : Pat<(VT128 (vector_insert V128:$src,
+ (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
(i64 imm:$Immd))),
- (EXTRACT_SUBREG
- (INS (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immd,
- (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
- dsub)>;
-}
-
-// Special case for <1 x double/i64> - insertion may be vector_from_scalar or
-// (vector_insert (vec) 0).
-multiclass Neon_INS_elt_pattern_v64d<ValueType VT128, ValueType VT64, ValueType VTSVE,
- ValueType VTScal> {
- // Extracting from the lower 128-bits of an SVE vector
- def : Pat<(VT64 (vec_ins_or_scal_vec
- (VTScal (vector_extract VTSVE:$Rm, VectorIndexD:$Immn)))),
- (EXTRACT_SUBREG
- (INSvi64lane (IMPLICIT_DEF), 0, (VT128 (EXTRACT_SUBREG VTSVE:$Rm, zsub)),
- VectorIndexD:$Immn),
- dsub)>;
+ (INS V128:$src, imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
- def : Pat<(VT64 (vec_ins_or_scal_vec
- (VTScal (vector_extract (VT128 V128:$Rm), (i64 imm:$Immn))))),
- (EXTRACT_SUBREG
- (INSvi64lane (IMPLICIT_DEF), (i64 0), V128:$Rm, imm:$Immn),
- dsub)>;
+ def : Pat<(VT64 (vector_insert V64:$src,
+ (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
+ (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
+ imm:$Immd, V128:$Rn, imm:$Immn),
+ dsub)>;
- // Extracting from another NEON vector
- def : Pat<(VT64 (vec_ins_or_scal_vec
- (VTScal (vector_extract (VT64 V64:$Rm), (i64 imm:$Immn))))),
+ def : Pat<(VT64 (vector_insert V64:$src,
+ (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
(EXTRACT_SUBREG
- (INSvi64lane (IMPLICIT_DEF), (i64 0),
- (SUBREG_TO_REG (i64 0), V64:$Rm, dsub), imm:$Immn),
+ (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
dsub)>;
}
-multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType SVESrcVT,
- ValueType VTScal, Operand ExIdxTy, Instruction INS> {
- defm : Neon_INS_elt_pattern_v64<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
- defm : Neon_INS_elt_pattern_v128<VT128, VT64, SVESrcVT, VTScal, ExIdxTy, INS>;
-}
-
-defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
defm : Neon_INS_elt_pattern<v8f16, v4f16, nxv8f16, f16, VectorIndexH, INSvi16lane>;
defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane>;
-
-defm : Neon_INS_elt_pattern_v128<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane>;
-defm : Neon_INS_elt_pattern_v64d<v2f64, v1f64, nxv2f64, f64>;
-defm : Neon_INS_elt_pattern_v128<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane>;
-defm : Neon_INS_elt_pattern_v64d<v2i64, v1i64, nxv2i64, i64>;
+defm : Neon_INS_elt_pattern<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane>;
+defm : Neon_INS_elt_pattern<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane>;
// Insert from bitcast
// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2b69903b133fe3..e68544361ff2ed 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3365,6 +3365,21 @@ let Predicates = [HasSVEorSME] in {
(UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
def : Pat<(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)),
(UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
+
+ // Move element from the bottom 128-bits of a scalable vector to a single-element vector.
+ // Alternative case where insertelement is just scalar_to_vector rather than vector_insert.
+ def : Pat<(v1f64 (scalar_to_vector
+ (f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))),
+ (EXTRACT_SUBREG
+ (INSvi64lane (IMPLICIT_DEF), (i64 0),
+ (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index),
+ dsub)>;
+ def : Pat<(v1i64 (scalar_to_vector
+ (i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))),
+ (EXTRACT_SUBREG
+ (INSvi64lane (IMPLICIT_DEF), (i64 0),
+ (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index),
+ dsub)>;
} // End HasNEON
let Predicates = [HasNEON] in {
>From 4f236b0ef6ee11106a01a2985b0ec14e44d6e56d Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 31 Oct 2024 09:51:53 +0000
Subject: [PATCH 4/4] Restore ordering of pattern defs
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 78e40d9946e747..7765fa6ed63e96 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7266,13 +7266,14 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
dsub)>;
}
-defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
defm : Neon_INS_elt_pattern<v8f16, v4f16, nxv8f16, f16, VectorIndexH, INSvi16lane>;
defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane>;
+
+defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2i64, v1i64, nxv2i64, i64, VectorIndexD, INSvi64lane>;
// Insert from bitcast
More information about the llvm-commits
mailing list