[llvm] [AArch64] Add tablegen patterns for i8 and i16 vector insert/extract pairs (PR #136091)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 16 23:57:29 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

An i8 and i16 vector extract/insert has to go via a i32 to make sure the types are legal. This patch adds patterns for extract from a i8/i16 vector, inserted into a i16/i32 vector. This avoids the round trip via a GPR which can limit performance.

---

Patch is 58.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136091.diff


9 Files Affected:

- (modified) llvm/include/llvm/Target/TargetSelectionDAG.td (+5-2) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+35) 
- (modified) llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll (+6-10) 
- (modified) llvm/test/CodeGen/AArch64/bitcast-extend.ll (+12-20) 
- (modified) llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll (+12-20) 
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+267-379) 
- (modified) llvm/test/CodeGen/AArch64/neon-bitcast.ll (+6-10) 
- (modified) llvm/test/CodeGen/AArch64/shuffle-extend.ll (+30-56) 
- (modified) llvm/test/CodeGen/AArch64/vector-fcvt.ll (+94-142) 


``````````diff
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 9c241b6c4df0f..30f580faee290 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -818,8 +818,11 @@ def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1,
 def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
                               []>;
 
-// vector_extract/vector_insert are deprecated. extractelt/insertelt
-// are preferred.
+// vector_extract/vector_insert are similar to extractelt/insertelt but allow
+// types that require promotion (a 16i8 extract where i8 is not a legal type so
+// uses i32 for example). extractelt/insertelt are preferred where the element
+// type and the extracted types match due to the extra type checking they
+// perform.
 def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
     SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
 def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7a01ed785afa..33fe71e4913b5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7307,6 +7307,41 @@ def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))),
 def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
           (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
 
+// Patterns for i8/i16 -> v2i32/v4i16 lane moves via insert and extract that go via i32.
+multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType OutVT,
+                                    Instruction INS, SDNodeXForm VecIndexMult> {
+  // VT64->OutVT
+  def : Pat<(OutVT (vector_insert (OutVT V64:$src),
+                                  (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+                                  (i64 imm:$Immd))),
+            (EXTRACT_SUBREG
+              (INS (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$src, dsub), (VecIndexMult imm:$Immd),
+                   (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+              dsub)>;
+  def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
+            (EXTRACT_SUBREG
+              (INS (IMPLICIT_DEF), 0,
+                   (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+              dsub)>;
+
+  // VT128->OutVT
+  def : Pat<(OutVT (vector_insert (OutVT V64:$src),
+                                  (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+                                  (i64 imm:$Immd))),
+            (EXTRACT_SUBREG
+              (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), (VecIndexMult imm:$Immd),
+                   V128:$Rn, imm:$Immn),
+              dsub)>;
+  def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
+            (EXTRACT_SUBREG
+              (INS (IMPLICIT_DEF), 0, V128:$Rn, imm:$Immn),
+              dsub)>;
+}
+
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v4i16, INSvi8lane, VecIndex_x2>;
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v2i32, INSvi8lane, VecIndex_x4>;
+defm : Neon_INS_elt_ext_pattern<v8i16, v4i16, v2i32, INSvi16lane, VecIndex_x2>;
+
 // bitcast of an extract
 // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
 def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
index 7a4cdd52db904..fccb1fb675768 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
@@ -11,14 +11,11 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
-; CHECK-SDAG-NEXT:    umov w9, v0.b[1]
 ; CHECK-SDAG-NEXT:    bfxil x8, x0, #0, #3
 ; CHECK-SDAG-NEXT:    ld1 { v1.b }[0], [x8]
-; CHECK-SDAG-NEXT:    umov w8, v0.b[2]
-; CHECK-SDAG-NEXT:    mov v1.h[1], w9
-; CHECK-SDAG-NEXT:    umov w9, v0.b[3]
-; CHECK-SDAG-NEXT:    mov v1.h[2], w8
-; CHECK-SDAG-NEXT:    mov v1.h[3], w9
+; CHECK-SDAG-NEXT:    mov v1.b[2], v0.b[1]
+; CHECK-SDAG-NEXT:    mov v1.b[4], v0.b[2]
+; CHECK-SDAG-NEXT:    mov v1.b[6], v0.b[3]
 ; CHECK-SDAG-NEXT:    fmov d0, d1
 ; CHECK-SDAG-NEXT:    add sp, sp, #16
 ; CHECK-SDAG-NEXT:    ret
@@ -168,11 +165,10 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
-; CHECK-SDAG-NEXT:    umov w9, v0.h[1]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #1, #2
-; CHECK-SDAG-NEXT:    ld1 { v0.h }[0], [x8]
-; CHECK-SDAG-NEXT:    mov v0.s[1], w9
-; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SDAG-NEXT:    ld1 { v1.h }[0], [x8]
+; CHECK-SDAG-NEXT:    mov v1.h[2], v0.h[1]
+; CHECK-SDAG-NEXT:    fmov d0, d1
 ; CHECK-SDAG-NEXT:    add sp, sp, #16
 ; CHECK-SDAG-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 85daa3ca6623e..33238ccf86a39 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -70,16 +70,12 @@ define <4 x i64> @z_i32_v4i64(i32 %x) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    movi v1.2d, #0x000000000000ff
-; CHECK-SD-NEXT:    umov w8, v0.b[2]
-; CHECK-SD-NEXT:    umov w9, v0.b[0]
-; CHECK-SD-NEXT:    umov w10, v0.b[3]
-; CHECK-SD-NEXT:    umov w11, v0.b[1]
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    fmov s2, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w11
-; CHECK-SD-NEXT:    mov v2.s[1], w10
-; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT:    mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT:    mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT:    mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT:    mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT:    ushll v0.2d, v2.2s, #0
+; CHECK-SD-NEXT:    ushll v2.2d, v3.2s, #0
 ; CHECK-SD-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    and v1.16b, v2.16b, v1.16b
 ; CHECK-SD-NEXT:    ret
@@ -176,16 +172,12 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
 ; CHECK-SD-LABEL: s_i32_v4i64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s0, w0
-; CHECK-SD-NEXT:    umov w8, v0.b[2]
-; CHECK-SD-NEXT:    umov w9, v0.b[0]
-; CHECK-SD-NEXT:    umov w10, v0.b[3]
-; CHECK-SD-NEXT:    umov w11, v0.b[1]
-; CHECK-SD-NEXT:    fmov s0, w9
-; CHECK-SD-NEXT:    fmov s1, w8
-; CHECK-SD-NEXT:    mov v0.s[1], w11
-; CHECK-SD-NEXT:    mov v1.s[1], w10
-; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT:    mov v1.b[0], v0.b[0]
+; CHECK-SD-NEXT:    mov v2.b[0], v0.b[2]
+; CHECK-SD-NEXT:    mov v1.b[4], v0.b[1]
+; CHECK-SD-NEXT:    mov v2.b[4], v0.b[3]
+; CHECK-SD-NEXT:    ushll v0.2d, v1.2s, #0
+; CHECK-SD-NEXT:    ushll v1.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #56
 ; CHECK-SD-NEXT:    shl v1.2d, v1.2d, #56
 ; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #56
diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
index 8b74de1c127dd..e90b6cb7f809b 100644
--- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
+++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
@@ -5,16 +5,12 @@
 define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; CHECKLE-LABEL: test_reconstructshuffle:
 ; CHECKLE:       // %bb.0:
-; CHECKLE-NEXT:    umov w8, v0.b[3]
-; CHECKLE-NEXT:    umov w9, v0.b[2]
-; CHECKLE-NEXT:    fmov s2, w8
-; CHECKLE-NEXT:    umov w8, v0.b[1]
-; CHECKLE-NEXT:    mov v2.h[1], w9
-; CHECKLE-NEXT:    mov v2.h[2], w8
-; CHECKLE-NEXT:    umov w8, v0.b[0]
-; CHECKLE-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
-; CHECKLE-NEXT:    mov v2.h[3], w8
-; CHECKLE-NEXT:    zip2 v0.8b, v0.8b, v0.8b
+; CHECKLE-NEXT:    mov v2.b[0], v0.b[3]
+; CHECKLE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKLE-NEXT:    mov v2.b[2], v0.b[2]
+; CHECKLE-NEXT:    mov v2.b[4], v0.b[1]
+; CHECKLE-NEXT:    mov v2.b[6], v0.b[0]
+; CHECKLE-NEXT:    zip2 v0.8b, v1.8b, v0.8b
 ; CHECKLE-NEXT:    add v0.4h, v2.4h, v0.4h
 ; CHECKLE-NEXT:    bic v0.4h, #255, lsl #8
 ; CHECKLE-NEXT:    ret
@@ -25,16 +21,12 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; CHECKBE-NEXT:    rev64 v1.16b, v1.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    umov w8, v0.b[3]
-; CHECKBE-NEXT:    umov w9, v0.b[2]
-; CHECKBE-NEXT:    fmov s2, w8
-; CHECKBE-NEXT:    umov w8, v0.b[1]
-; CHECKBE-NEXT:    mov v2.h[1], w9
-; CHECKBE-NEXT:    mov v2.h[2], w8
-; CHECKBE-NEXT:    umov w8, v0.b[0]
-; CHECKBE-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    mov v2.h[3], w8
-; CHECKBE-NEXT:    zip2 v0.8b, v0.8b, v0.8b
+; CHECKBE-NEXT:    mov v2.b[0], v0.b[3]
+; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    mov v2.b[2], v0.b[2]
+; CHECKBE-NEXT:    mov v2.b[4], v0.b[1]
+; CHECKBE-NEXT:    mov v2.b[6], v0.b[0]
+; CHECKBE-NEXT:    zip2 v0.8b, v1.8b, v0.8b
 ; CHECKBE-NEXT:    add v0.4h, v2.4h, v0.4h
 ; CHECKBE-NEXT:    bic v0.4h, #255, lsl #8
 ; CHECKBE-NEXT:    rev64 v0.4h, v0.4h
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 07957c117868d..fb2bdb4d63f47 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -3443,26 +3443,18 @@ define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) {
 ; CHECK-SD-LABEL: stofp_v8i8_v8f64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    umov w8, v0.b[0]
-; CHECK-SD-NEXT:    umov w9, v0.b[2]
-; CHECK-SD-NEXT:    umov w11, v0.b[4]
-; CHECK-SD-NEXT:    umov w12, v0.b[6]
-; CHECK-SD-NEXT:    umov w10, v0.b[1]
-; CHECK-SD-NEXT:    umov w13, v0.b[3]
-; CHECK-SD-NEXT:    umov w14, v0.b[5]
-; CHECK-SD-NEXT:    umov w15, v0.b[7]
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    fmov s1, w9
-; CHECK-SD-NEXT:    fmov s2, w11
-; CHECK-SD-NEXT:    fmov s3, w12
-; CHECK-SD-NEXT:    mov v0.s[1], w10
-; CHECK-SD-NEXT:    mov v1.s[1], w13
-; CHECK-SD-NEXT:    mov v2.s[1], w14
-; CHECK-SD-NEXT:    mov v3.s[1], w15
-; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
-; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #24
-; CHECK-SD-NEXT:    shl v2.2s, v2.2s, #24
-; CHECK-SD-NEXT:    shl v3.2s, v3.2s, #24
+; CHECK-SD-NEXT:    mov v1.b[0], v0.b[0]
+; CHECK-SD-NEXT:    mov v2.b[0], v0.b[2]
+; CHECK-SD-NEXT:    mov v3.b[0], v0.b[4]
+; CHECK-SD-NEXT:    mov v4.b[0], v0.b[6]
+; CHECK-SD-NEXT:    mov v1.b[4], v0.b[1]
+; CHECK-SD-NEXT:    mov v2.b[4], v0.b[3]
+; CHECK-SD-NEXT:    mov v3.b[4], v0.b[5]
+; CHECK-SD-NEXT:    mov v4.b[4], v0.b[7]
+; CHECK-SD-NEXT:    shl v0.2s, v1.2s, #24
+; CHECK-SD-NEXT:    shl v1.2s, v2.2s, #24
+; CHECK-SD-NEXT:    shl v2.2s, v3.2s, #24
+; CHECK-SD-NEXT:    shl v3.2s, v4.2s, #24
 ; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
 ; CHECK-SD-NEXT:    sshr v1.2s, v1.2s, #24
 ; CHECK-SD-NEXT:    sshr v2.2s, v2.2s, #24
@@ -3500,27 +3492,19 @@ define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) {
 ; CHECK-SD-LABEL: utofp_v8i8_v8f64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    umov w8, v0.b[0]
-; CHECK-SD-NEXT:    umov w9, v0.b[2]
-; CHECK-SD-NEXT:    umov w11, v0.b[4]
-; CHECK-SD-NEXT:    umov w12, v0.b[6]
-; CHECK-SD-NEXT:    umov w10, v0.b[1]
-; CHECK-SD-NEXT:    umov w13, v0.b[3]
-; CHECK-SD-NEXT:    umov w14, v0.b[5]
-; CHECK-SD-NEXT:    umov w15, v0.b[7]
+; CHECK-SD-NEXT:    mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT:    mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT:    mov v4.b[0], v0.b[4]
+; CHECK-SD-NEXT:    mov v5.b[0], v0.b[6]
 ; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    fmov s2, w9
-; CHECK-SD-NEXT:    fmov s3, w11
-; CHECK-SD-NEXT:    fmov s4, w12
-; CHECK-SD-NEXT:    mov v0.s[1], w10
-; CHECK-SD-NEXT:    mov v2.s[1], w13
-; CHECK-SD-NEXT:    mov v3.s[1], w14
-; CHECK-SD-NEXT:    mov v4.s[1], w15
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    and v2.8b, v2.8b, v1.8b
-; CHECK-SD-NEXT:    and v3.8b, v3.8b, v1.8b
-; CHECK-SD-NEXT:    and v1.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT:    mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT:    mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT:    mov v4.b[4], v0.b[5]
+; CHECK-SD-NEXT:    mov v5.b[4], v0.b[7]
+; CHECK-SD-NEXT:    and v0.8b, v2.8b, v1.8b
+; CHECK-SD-NEXT:    and v2.8b, v3.8b, v1.8b
+; CHECK-SD-NEXT:    and v3.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT:    and v1.8b, v5.8b, v1.8b
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    ushll v2.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
@@ -3553,68 +3537,52 @@ entry:
 define <16 x double> @stofp_v16i8_v16f64(<16 x i8> %a) {
 ; CHECK-SD-LABEL: stofp_v16i8_v16f64:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    umov w8, v0.b[0]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    umov w9, v0.b[1]
-; CHECK-SD-NEXT:    umov w10, v0.b[2]
-; CHECK-SD-NEXT:    umov w12, v0.b[4]
-; CHECK-SD-NEXT:    umov w14, v0.b[6]
-; CHECK-SD-NEXT:    umov w11, v0.b[3]
-; CHECK-SD-NEXT:    umov w13, v0.b[5]
-; CHECK-SD-NEXT:    fmov s2, w8
-; CHECK-SD-NEXT:    umov w15, v1.b[0]
-; CHECK-SD-NEXT:    umov w17, v1.b[2]
-; CHECK-SD-NEXT:    umov w0, v1.b[4]
-; CHECK-SD-NEXT:    umov w16, v1.b[1]
-; CHECK-SD-NEXT:    umov w18, v1.b[3]
-; CHECK-SD-NEXT:    umov w8, v0.b[7]
-; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    umov w10, v1.b[5]
-; CHECK-SD-NEXT:    mov v2.s[1], w9
-; CHECK-SD-NEXT:    umov w9, v1.b[6]
-; CHECK-SD-NEXT:    fmov s3, w12
-; CHECK-SD-NEXT:    umov w12, v1.b[7]
-; CHECK-SD-NEXT:    fmov s1, w14
-; CHECK-SD-NEXT:    fmov s4, w15
-; CHECK-SD-NEXT:    fmov s5, w17
-; CHECK-SD-NEXT:    fmov s6, w0
-; CHECK-SD-NEXT:    mov v0.s[1], w11
-; CHECK-SD-NEXT:    mov v3.s[1], w13
-; CHECK-SD-NEXT:    fmov s7, w9
-; CHECK-SD-NEXT:    mov v1.s[1], w8
-; CHECK-SD-NEXT:    mov v4.s[1], w16
-; CHECK-SD-NEXT:    mov v5.s[1], w18
-; CHECK-SD-NEXT:    mov v6.s[1], w10
-; CHECK-SD-NEXT:    shl v2.2s, v2.2s, #24
-; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
-; CHECK-SD-NEXT:    mov v7.s[1], w12
-; CHECK-SD-NEXT:    shl v3.2s, v3.2s, #24
-; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #24
-; CHECK-SD-NEXT:    shl v4.2s, v4.2s, #24
-; CHECK-SD-NEXT:    sshr v2.2s, v2.2s, #24
-; CHECK-SD-NEXT:    shl v5.2s, v5.2s, #24
-; CHECK-SD-NEXT:    shl v6.2s, v6.2s, #24
+; CHECK-SD-NEXT:    mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT:    mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT:    mov v4.b[0], v0.b[4]
+; CHECK-SD-NEXT:    mov v5.b[0], v0.b[6]
+; CHECK-SD-NEXT:    mov v6.b[0], v1.b[0]
+; CHECK-SD-NEXT:    mov v7.b[0], v1.b[2]
+; CHECK-SD-NEXT:    mov v16.b[0], v1.b[4]
+; CHECK-SD-NEXT:    mov v17.b[0], v1.b[6]
+; CHECK-SD-NEXT:    mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT:    mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT:    mov v4.b[4], v0.b[5]
+; CHECK-SD-NEXT:    mov v5.b[4], v0.b[7]
+; CHECK-SD-NEXT:    mov v6.b[4], v1.b[1]
+; CHECK-SD-NEXT:    mov v7.b[4], v1.b[3]
+; CHECK-SD-NEXT:    mov v16.b[4], v1.b[5]
+; CHECK-SD-NEXT:    mov v17.b[4], v1.b[7]
+; CHECK-SD-NEXT:    shl v0.2s, v2.2s, #24
+; CHECK-SD-NEXT:    shl v1.2s, v3.2s, #24
+; CHECK-SD-NEXT:    shl v2.2s, v4.2s, #24
+; CHECK-SD-NEXT:    shl v3.2s, v5.2s, #24
+; CHECK-SD-NEXT:    shl v4.2s, v6.2s, #24
+; CHECK-SD-NEXT:    shl v5.2s, v7.2s, #24
+; CHECK-SD-NEXT:    shl v6.2s, v16.2s, #24
+; CHECK-SD-NEXT:    shl v7.2s, v17.2s, #24
 ; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
+; CHECK-SD-NEXT:    sshr v1.2s, v1.2s, #24
+; CHECK-SD-NEXT:    sshr v2.2s, v2.2s, #24
 ; CHECK-SD-NEXT:    sshr v3.2s, v3.2s, #24
-; CHECK-SD-NEXT:    shl v7.2s, v7.2s, #24
 ; CHECK-SD-NEXT:    sshr v4.2s, v4.2s, #24
-; CHECK-SD-NEXT:    sshr v1.2s, v1.2s, #24
 ; CHECK-SD-NEXT:    sshr v5.2s, v5.2s, #24
 ; CHECK-SD-NEXT:    sshr v6.2s, v6.2s, #24
+; CHECK-SD-NEXT:    sshr v7.2s, v7.2s, #24
+; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    sshll v1.2d, v1.2s, #0
 ; CHECK-SD-NEXT:    sshll v2.2d, v2.2s, #0
-; CHECK-SD-NEXT:    sshll v16.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    sshll v3.2d, v3.2s, #0
-; CHECK-SD-NEXT:    sshr v7.2s, v7.2s, #24
 ; CHECK-SD-NEXT:    sshll v4.2d, v4.2s, #0
-; CHECK-SD-NEXT:    sshll v17.2d, v1.2s, #0
 ; CHECK-SD-NEXT:    sshll v5.2d, v5.2s, #0
 ; CHECK-SD-NEXT:    sshll v6.2d, v6.2s, #0
-; CHECK-SD-NEXT:    scvtf v0.2d, v2.2d
-; CHECK-SD-NEXT:    scvtf v1.2d, v16.2d
-; CHECK-SD-NEXT:    scvtf v2.2d, v3.2d
 ; CHECK-SD-NEXT:    sshll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT:    scvtf v1.2d, v1.2d
+; CHECK-SD-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT:    scvtf v3.2d, v3.2d
 ; CHECK-SD-NEXT:    scvtf v4.2d, v4.2d
-; CHECK-SD-NEXT:    scvtf v3.2d, v17.2d
 ; CHECK-SD-NEXT:    scvtf v5.2d, v5.2d
 ; CHECK-SD-NEXT:    scvtf v6.2d, v6.2d
 ; CHECK-SD-NEXT:    scvtf v7.2d, v7.2d
@@ -3654,63 +3622,47 @@ define <16 x double> @utofp_v16i8_v16f64(<16 x i8> %a) {
 ; CHECK-SD-LABEL: utofp_v16i8_v16f64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    umov w8, v0.b[0]
-; CHECK-SD-NEXT:    umov w10, v0.b[2]
-; CHECK-SD-NEXT:    umov w9, v0.b[1]
-; CHECK-SD-NEXT:    umov w12, v0.b[4]
-; CHECK-SD-NEXT:    umov w11, v0.b[3]
-; CHECK-SD-NEXT:    umov w13, v0.b[5]
-; CHECK-SD-NEXT:    umov w18, v0.b[6]
+; CHECK-SD-NEXT:    mov v3.b[0], v0.b[0]
+; CHECK-SD-NEXT:    mov v4.b[0], v0.b[2]
+; CHECK-SD-NEXT:    mov v5.b[0], v0.b[4]
+; CHECK-SD-NEXT:    mov v6.b[0], v0.b[6]
 ; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT:    umov w14, v2.b[0]
-; CHECK-SD-NEXT:    umov w16, v2.b[2]
-; CHECK-SD-NEXT:    umov w0, v2.b[4]
-; CHECK-SD-NEXT:    fmov s3, w8
-; CHECK-SD-NEXT:    umov w8, v0.b[7]
-; CHECK-SD-NEXT:    fmov s0, w10
-; CHECK-SD-NEXT:    umov w10, v2.b[6]
-; CHECK-SD-NEXT:    umov w15, v2.b[1]
-; CHECK-SD-NEXT:    umov w17, v2.b[3]
-; CHECK-SD-NEXT:    fmov s4, w12
-; CHECK-SD-NEXT:    umov w12, v2.b[5]
-; CHECK-SD-NEXT:    fmov s7, w18
-; CHECK-SD-NEXT:    mov v3.s[1], w9
-; CHECK-SD-NEXT:    umov w9, v2.b[7]
-; CHECK-SD-NEXT:    fmov s2, w14
-; CHECK-SD-NEXT:    fmov s5, w16
-; CHECK-SD-NEXT:    fmov s6, w0
-; CHECK-SD-NEXT:    mov v0.s[1], w11
-; CHECK-SD-NEXT:    fmov s16, w10
-; CHECK-SD-NEXT:    mov v4.s[1], w13
-; CHECK-SD-NEXT:    mov v7.s[1], w8
-; CHECK-SD-NEXT:    mov v2.s[1], w15
-; CHECK-SD-NEXT:    mov v5.s[1], w17
-; CHECK-SD-NEXT:    mov v6.s[1], w12
-; CHECK-SD-NEXT:    and v3.8b, v3.8b, v1.8b
-; CHECK-SD-NEXT:    mov v16.s[1], w9
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    and v4.8b, v4.8b, v1.8b
-; CHECK-SD-NEXT:    and v7.8b, v7.8b, v1.8b
-; CHECK-SD-NEXT:    and v2.8b, v2.8b, v1.8b
-; CHECK-SD-NEXT:    ushll v3.2d, v3.2s, #0
-; CHECK-SD-NEXT:    and v5.8b, v5.8b, v1.8b
-; CHECK-SD-NEXT:    and v6.8b, v6.8b, v1.8b
-; CHECK-SD-NEXT:    and v1.8b, v16.8b, v1.8b
-; CHECK-SD-NEXT:    ushll v16.2d, v0.2s, #0
-; CHECK-SD-NEXT:    ushll v17.2d, v4.2s, #0
+; CHECK-SD-NEXT:    mov v7.b[0], v2.b[0]
+; CHECK-SD-NEXT:    mov v16.b[0], v2.b[2]
+; CHECK-SD-NEXT:    mov v17.b[0], v2.b[4]
+; CHECK-SD-NEXT:    mov v18.b[0], v2.b[6]
+; CHECK-SD-NEXT:    mov v3.b[4], v0.b[1]
+; CHECK-SD-NEXT:    mov v4.b[4], v0.b[3]
+; CHECK-SD-NEXT:    mov v5.b[4], v0.b[5]
+; CHECK-SD-NEXT:    mov v6.b[4], v0.b[7]
+; CHECK-SD-NEXT:    mov v7.b[4], v2.b[1]
+; CHECK-SD-NEXT:    mov v16.b[4], v2.b[3]
+; CHECK-SD-NEXT:    mov v17.b[4], v2.b[5]
+; CHECK-SD-NEXT:    mov v18.b[4], v2.b[7]
+; CHECK-SD-NEXT:    and v0.8b, v3.8b, v1.8b
+; CHECK-SD-NEXT:    and v2.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT:    and v3.8b, v5.8b, v1.8b
+; CHECK-SD-NEXT:    and v4.8b, v6.8b, v1.8b
+; CHECK-SD-NEXT:    and v5.8b, v7.8b, v1.8b
+; CHECK-SD-NEXT:    and v6.8b, v16.8b, v1.8b
+; CHECK-SD-NEXT:    and v7.8b, v17.8b, v1.8b
+; CHECK-SD-NEXT:    and v1.8b, v18.8b, v1.8b
+; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-N...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/136091


More information about the llvm-commits mailing list