[llvm] [AArch64] Add tablegen patterns for i8 and i16 vector insert/extract pairs (PR #136091)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 23:57:29 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
An i8 and i16 vector extract/insert has to go via a i32 to make sure the types are legal. This patch adds patterns for extract from a i8/i16 vector, inserted into a i16/i32 vector. This avoids the round trip via a GPR which can limit performance.
---
Patch is 58.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136091.diff
9 Files Affected:
- (modified) llvm/include/llvm/Target/TargetSelectionDAG.td (+5-2)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+35)
- (modified) llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll (+6-10)
- (modified) llvm/test/CodeGen/AArch64/bitcast-extend.ll (+12-20)
- (modified) llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll (+12-20)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+267-379)
- (modified) llvm/test/CodeGen/AArch64/neon-bitcast.ll (+6-10)
- (modified) llvm/test/CodeGen/AArch64/shuffle-extend.ll (+30-56)
- (modified) llvm/test/CodeGen/AArch64/vector-fcvt.ll (+94-142)
``````````diff
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 9c241b6c4df0f..30f580faee290 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -818,8 +818,11 @@ def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1,
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
[]>;
-// vector_extract/vector_insert are deprecated. extractelt/insertelt
-// are preferred.
+// vector_extract/vector_insert are similar to extractelt/insertelt but allow
+// types that require promotion (a 16i8 extract where i8 is not a legal type so
+// uses i32 for example). extractelt/insertelt are preferred where the element
+// type and the extracted types match due to the extra type checking they
+// perform.
def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7a01ed785afa..33fe71e4913b5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7307,6 +7307,41 @@ def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))),
def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
(INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
+// Patterns for i8/i16 -> v2i32/v4i16 lane moves via insert and extract that go via i32.
+multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType OutVT,
+ Instruction INS, SDNodeXForm VecIndexMult> {
+ // VT64->OutVT
+ def : Pat<(OutVT (vector_insert (OutVT V64:$src),
+ (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
+ (EXTRACT_SUBREG
+ (INS (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$src, dsub), (VecIndexMult imm:$Immd),
+ (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+ dsub)>;
+ def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
+ (EXTRACT_SUBREG
+ (INS (IMPLICIT_DEF), 0,
+ (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+ dsub)>;
+
+ // VT128->OutVT
+ def : Pat<(OutVT (vector_insert (OutVT V64:$src),
+ (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
+ (EXTRACT_SUBREG
+ (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), (VecIndexMult imm:$Immd),
+ V128:$Rn, imm:$Immn),
+ dsub)>;
+ def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
+ (EXTRACT_SUBREG
+ (INS (IMPLICIT_DEF), 0, V128:$Rn, imm:$Immn),
+ dsub)>;
+}
+
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v4i16, INSvi8lane, VecIndex_x2>;
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v2i32, INSvi8lane, VecIndex_x4>;
+defm : Neon_INS_elt_ext_pattern<v8i16, v4i16, v2i32, INSvi16lane, VecIndex_x2>;
+
// bitcast of an extract
// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
index 7a4cdd52db904..fccb1fb675768 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
@@ -11,14 +11,11 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SDAG-NEXT: str d0, [sp, #8]
-; CHECK-SDAG-NEXT: umov w9, v0.b[1]
; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #3
; CHECK-SDAG-NEXT: ld1 { v1.b }[0], [x8]
-; CHECK-SDAG-NEXT: umov w8, v0.b[2]
-; CHECK-SDAG-NEXT: mov v1.h[1], w9
-; CHECK-SDAG-NEXT: umov w9, v0.b[3]
-; CHECK-SDAG-NEXT: mov v1.h[2], w8
-; CHECK-SDAG-NEXT: mov v1.h[3], w9
+; CHECK-SDAG-NEXT: mov v1.b[2], v0.b[1]
+; CHECK-SDAG-NEXT: mov v1.b[4], v0.b[2]
+; CHECK-SDAG-NEXT: mov v1.b[6], v0.b[3]
; CHECK-SDAG-NEXT: fmov d0, d1
; CHECK-SDAG-NEXT: add sp, sp, #16
; CHECK-SDAG-NEXT: ret
@@ -168,11 +165,10 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SDAG-NEXT: str d0, [sp, #8]
-; CHECK-SDAG-NEXT: umov w9, v0.h[1]
; CHECK-SDAG-NEXT: bfi x8, x0, #1, #2
-; CHECK-SDAG-NEXT: ld1 { v0.h }[0], [x8]
-; CHECK-SDAG-NEXT: mov v0.s[1], w9
-; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SDAG-NEXT: ld1 { v1.h }[0], [x8]
+; CHECK-SDAG-NEXT: mov v1.h[2], v0.h[1]
+; CHECK-SDAG-NEXT: fmov d0, d1
; CHECK-SDAG-NEXT: add sp, sp, #16
; CHECK-SDAG-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 85daa3ca6623e..33238ccf86a39 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -70,16 +70,12 @@ define <4 x i64> @z_i32_v4i64(i32 %x) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff
-; CHECK-SD-NEXT: umov w8, v0.b[2]
-; CHECK-SD-NEXT: umov w9, v0.b[0]
-; CHECK-SD-NEXT: umov w10, v0.b[3]
-; CHECK-SD-NEXT: umov w11, v0.b[1]
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: fmov s2, w8
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: mov v2.s[1], w10
-; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v3.2s, #0
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: and v1.16b, v2.16b, v1.16b
; CHECK-SD-NEXT: ret
@@ -176,16 +172,12 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
; CHECK-SD-LABEL: s_i32_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: umov w8, v0.b[2]
-; CHECK-SD-NEXT: umov w9, v0.b[0]
-; CHECK-SD-NEXT: umov w10, v0.b[3]
-; CHECK-SD-NEXT: umov w11, v0.b[1]
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: mov v1.s[1], w10
-; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
+; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll v1.2d, v2.2s, #0
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56
; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
index 8b74de1c127dd..e90b6cb7f809b 100644
--- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
+++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
@@ -5,16 +5,12 @@
define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECKLE-LABEL: test_reconstructshuffle:
; CHECKLE: // %bb.0:
-; CHECKLE-NEXT: umov w8, v0.b[3]
-; CHECKLE-NEXT: umov w9, v0.b[2]
-; CHECKLE-NEXT: fmov s2, w8
-; CHECKLE-NEXT: umov w8, v0.b[1]
-; CHECKLE-NEXT: mov v2.h[1], w9
-; CHECKLE-NEXT: mov v2.h[2], w8
-; CHECKLE-NEXT: umov w8, v0.b[0]
-; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
-; CHECKLE-NEXT: mov v2.h[3], w8
-; CHECKLE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; CHECKLE-NEXT: mov v2.b[0], v0.b[3]
+; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKLE-NEXT: mov v2.b[2], v0.b[2]
+; CHECKLE-NEXT: mov v2.b[4], v0.b[1]
+; CHECKLE-NEXT: mov v2.b[6], v0.b[0]
+; CHECKLE-NEXT: zip2 v0.8b, v1.8b, v0.8b
; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h
; CHECKLE-NEXT: bic v0.4h, #255, lsl #8
; CHECKLE-NEXT: ret
@@ -25,16 +21,12 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECKBE-NEXT: rev64 v1.16b, v1.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: umov w8, v0.b[3]
-; CHECKBE-NEXT: umov w9, v0.b[2]
-; CHECKBE-NEXT: fmov s2, w8
-; CHECKBE-NEXT: umov w8, v0.b[1]
-; CHECKBE-NEXT: mov v2.h[1], w9
-; CHECKBE-NEXT: mov v2.h[2], w8
-; CHECKBE-NEXT: umov w8, v0.b[0]
-; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: mov v2.h[3], w8
-; CHECKBE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; CHECKBE-NEXT: mov v2.b[0], v0.b[3]
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: mov v2.b[2], v0.b[2]
+; CHECKBE-NEXT: mov v2.b[4], v0.b[1]
+; CHECKBE-NEXT: mov v2.b[6], v0.b[0]
+; CHECKBE-NEXT: zip2 v0.8b, v1.8b, v0.8b
; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h
; CHECKBE-NEXT: bic v0.4h, #255, lsl #8
; CHECKBE-NEXT: rev64 v0.4h, v0.4h
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 07957c117868d..fb2bdb4d63f47 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -3443,26 +3443,18 @@ define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) {
; CHECK-SD-LABEL: stofp_v8i8_v8f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: umov w8, v0.b[0]
-; CHECK-SD-NEXT: umov w9, v0.b[2]
-; CHECK-SD-NEXT: umov w11, v0.b[4]
-; CHECK-SD-NEXT: umov w12, v0.b[6]
-; CHECK-SD-NEXT: umov w10, v0.b[1]
-; CHECK-SD-NEXT: umov w13, v0.b[3]
-; CHECK-SD-NEXT: umov w14, v0.b[5]
-; CHECK-SD-NEXT: umov w15, v0.b[7]
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: fmov s1, w9
-; CHECK-SD-NEXT: fmov s2, w11
-; CHECK-SD-NEXT: fmov s3, w12
-; CHECK-SD-NEXT: mov v0.s[1], w10
-; CHECK-SD-NEXT: mov v1.s[1], w13
-; CHECK-SD-NEXT: mov v2.s[1], w14
-; CHECK-SD-NEXT: mov v3.s[1], w15
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-SD-NEXT: shl v2.2s, v2.2s, #24
-; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24
+; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[7]
+; CHECK-SD-NEXT: shl v0.2s, v1.2s, #24
+; CHECK-SD-NEXT: shl v1.2s, v2.2s, #24
+; CHECK-SD-NEXT: shl v2.2s, v3.2s, #24
+; CHECK-SD-NEXT: shl v3.2s, v4.2s, #24
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
@@ -3500,27 +3492,19 @@ define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) {
; CHECK-SD-LABEL: utofp_v8i8_v8f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: umov w8, v0.b[0]
-; CHECK-SD-NEXT: umov w9, v0.b[2]
-; CHECK-SD-NEXT: umov w11, v0.b[4]
-; CHECK-SD-NEXT: umov w12, v0.b[6]
-; CHECK-SD-NEXT: umov w10, v0.b[1]
-; CHECK-SD-NEXT: umov w13, v0.b[3]
-; CHECK-SD-NEXT: umov w14, v0.b[5]
-; CHECK-SD-NEXT: umov w15, v0.b[7]
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: fmov s2, w9
-; CHECK-SD-NEXT: fmov s3, w11
-; CHECK-SD-NEXT: fmov s4, w12
-; CHECK-SD-NEXT: mov v0.s[1], w10
-; CHECK-SD-NEXT: mov v2.s[1], w13
-; CHECK-SD-NEXT: mov v3.s[1], w14
-; CHECK-SD-NEXT: mov v4.s[1], w15
-; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: and v2.8b, v2.8b, v1.8b
-; CHECK-SD-NEXT: and v3.8b, v3.8b, v1.8b
-; CHECK-SD-NEXT: and v1.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v5.b[4], v0.b[7]
+; CHECK-SD-NEXT: and v0.8b, v2.8b, v1.8b
+; CHECK-SD-NEXT: and v2.8b, v3.8b, v1.8b
+; CHECK-SD-NEXT: and v3.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT: and v1.8b, v5.8b, v1.8b
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
@@ -3553,68 +3537,52 @@ entry:
define <16 x double> @stofp_v16i8_v16f64(<16 x i8> %a) {
; CHECK-SD-LABEL: stofp_v16i8_v16f64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umov w8, v0.b[0]
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: umov w9, v0.b[1]
-; CHECK-SD-NEXT: umov w10, v0.b[2]
-; CHECK-SD-NEXT: umov w12, v0.b[4]
-; CHECK-SD-NEXT: umov w14, v0.b[6]
-; CHECK-SD-NEXT: umov w11, v0.b[3]
-; CHECK-SD-NEXT: umov w13, v0.b[5]
-; CHECK-SD-NEXT: fmov s2, w8
-; CHECK-SD-NEXT: umov w15, v1.b[0]
-; CHECK-SD-NEXT: umov w17, v1.b[2]
-; CHECK-SD-NEXT: umov w0, v1.b[4]
-; CHECK-SD-NEXT: umov w16, v1.b[1]
-; CHECK-SD-NEXT: umov w18, v1.b[3]
-; CHECK-SD-NEXT: umov w8, v0.b[7]
-; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: umov w10, v1.b[5]
-; CHECK-SD-NEXT: mov v2.s[1], w9
-; CHECK-SD-NEXT: umov w9, v1.b[6]
-; CHECK-SD-NEXT: fmov s3, w12
-; CHECK-SD-NEXT: umov w12, v1.b[7]
-; CHECK-SD-NEXT: fmov s1, w14
-; CHECK-SD-NEXT: fmov s4, w15
-; CHECK-SD-NEXT: fmov s5, w17
-; CHECK-SD-NEXT: fmov s6, w0
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: mov v3.s[1], w13
-; CHECK-SD-NEXT: fmov s7, w9
-; CHECK-SD-NEXT: mov v1.s[1], w8
-; CHECK-SD-NEXT: mov v4.s[1], w16
-; CHECK-SD-NEXT: mov v5.s[1], w18
-; CHECK-SD-NEXT: mov v6.s[1], w10
-; CHECK-SD-NEXT: shl v2.2s, v2.2s, #24
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-SD-NEXT: mov v7.s[1], w12
-; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24
-; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-SD-NEXT: shl v4.2s, v4.2s, #24
-; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
-; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24
-; CHECK-SD-NEXT: shl v6.2s, v6.2s, #24
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov v6.b[0], v1.b[0]
+; CHECK-SD-NEXT: mov v7.b[0], v1.b[2]
+; CHECK-SD-NEXT: mov v16.b[0], v1.b[4]
+; CHECK-SD-NEXT: mov v17.b[0], v1.b[6]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v5.b[4], v0.b[7]
+; CHECK-SD-NEXT: mov v6.b[4], v1.b[1]
+; CHECK-SD-NEXT: mov v7.b[4], v1.b[3]
+; CHECK-SD-NEXT: mov v16.b[4], v1.b[5]
+; CHECK-SD-NEXT: mov v17.b[4], v1.b[7]
+; CHECK-SD-NEXT: shl v0.2s, v2.2s, #24
+; CHECK-SD-NEXT: shl v1.2s, v3.2s, #24
+; CHECK-SD-NEXT: shl v2.2s, v4.2s, #24
+; CHECK-SD-NEXT: shl v3.2s, v5.2s, #24
+; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24
+; CHECK-SD-NEXT: shl v5.2s, v7.2s, #24
+; CHECK-SD-NEXT: shl v6.2s, v16.2s, #24
+; CHECK-SD-NEXT: shl v7.2s, v17.2s, #24
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
+; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
; CHECK-SD-NEXT: sshr v3.2s, v3.2s, #24
-; CHECK-SD-NEXT: shl v7.2s, v7.2s, #24
; CHECK-SD-NEXT: sshr v4.2s, v4.2s, #24
-; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-SD-NEXT: sshr v5.2s, v5.2s, #24
; CHECK-SD-NEXT: sshr v6.2s, v6.2s, #24
+; CHECK-SD-NEXT: sshr v7.2s, v7.2s, #24
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-SD-NEXT: sshll v16.2d, v0.2s, #0
; CHECK-SD-NEXT: sshll v3.2d, v3.2s, #0
-; CHECK-SD-NEXT: sshr v7.2s, v7.2s, #24
; CHECK-SD-NEXT: sshll v4.2d, v4.2s, #0
-; CHECK-SD-NEXT: sshll v17.2d, v1.2s, #0
; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
-; CHECK-SD-NEXT: scvtf v0.2d, v2.2d
-; CHECK-SD-NEXT: scvtf v1.2d, v16.2d
-; CHECK-SD-NEXT: scvtf v2.2d, v3.2d
; CHECK-SD-NEXT: sshll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: scvtf v1.2d, v1.2d
+; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: scvtf v3.2d, v3.2d
; CHECK-SD-NEXT: scvtf v4.2d, v4.2d
-; CHECK-SD-NEXT: scvtf v3.2d, v17.2d
; CHECK-SD-NEXT: scvtf v5.2d, v5.2d
; CHECK-SD-NEXT: scvtf v6.2d, v6.2d
; CHECK-SD-NEXT: scvtf v7.2d, v7.2d
@@ -3654,63 +3622,47 @@ define <16 x double> @utofp_v16i8_v16f64(<16 x i8> %a) {
; CHECK-SD-LABEL: utofp_v16i8_v16f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: umov w8, v0.b[0]
-; CHECK-SD-NEXT: umov w10, v0.b[2]
-; CHECK-SD-NEXT: umov w9, v0.b[1]
-; CHECK-SD-NEXT: umov w12, v0.b[4]
-; CHECK-SD-NEXT: umov w11, v0.b[3]
-; CHECK-SD-NEXT: umov w13, v0.b[5]
-; CHECK-SD-NEXT: umov w18, v0.b[6]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v5.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v6.b[0], v0.b[6]
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT: umov w14, v2.b[0]
-; CHECK-SD-NEXT: umov w16, v2.b[2]
-; CHECK-SD-NEXT: umov w0, v2.b[4]
-; CHECK-SD-NEXT: fmov s3, w8
-; CHECK-SD-NEXT: umov w8, v0.b[7]
-; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: umov w10, v2.b[6]
-; CHECK-SD-NEXT: umov w15, v2.b[1]
-; CHECK-SD-NEXT: umov w17, v2.b[3]
-; CHECK-SD-NEXT: fmov s4, w12
-; CHECK-SD-NEXT: umov w12, v2.b[5]
-; CHECK-SD-NEXT: fmov s7, w18
-; CHECK-SD-NEXT: mov v3.s[1], w9
-; CHECK-SD-NEXT: umov w9, v2.b[7]
-; CHECK-SD-NEXT: fmov s2, w14
-; CHECK-SD-NEXT: fmov s5, w16
-; CHECK-SD-NEXT: fmov s6, w0
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: fmov s16, w10
-; CHECK-SD-NEXT: mov v4.s[1], w13
-; CHECK-SD-NEXT: mov v7.s[1], w8
-; CHECK-SD-NEXT: mov v2.s[1], w15
-; CHECK-SD-NEXT: mov v5.s[1], w17
-; CHECK-SD-NEXT: mov v6.s[1], w12
-; CHECK-SD-NEXT: and v3.8b, v3.8b, v1.8b
-; CHECK-SD-NEXT: mov v16.s[1], w9
-; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: and v4.8b, v4.8b, v1.8b
-; CHECK-SD-NEXT: and v7.8b, v7.8b, v1.8b
-; CHECK-SD-NEXT: and v2.8b, v2.8b, v1.8b
-; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-SD-NEXT: and v5.8b, v5.8b, v1.8b
-; CHECK-SD-NEXT: and v6.8b, v6.8b, v1.8b
-; CHECK-SD-NEXT: and v1.8b, v16.8b, v1.8b
-; CHECK-SD-NEXT: ushll v16.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v17.2d, v4.2s, #0
+; CHECK-SD-NEXT: mov v7.b[0], v2.b[0]
+; CHECK-SD-NEXT: mov v16.b[0], v2.b[2]
+; CHECK-SD-NEXT: mov v17.b[0], v2.b[4]
+; CHECK-SD-NEXT: mov v18.b[0], v2.b[6]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v5.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v6.b[4], v0.b[7]
+; CHECK-SD-NEXT: mov v7.b[4], v2.b[1]
+; CHECK-SD-NEXT: mov v16.b[4], v2.b[3]
+; CHECK-SD-NEXT: mov v17.b[4], v2.b[5]
+; CHECK-SD-NEXT: mov v18.b[4], v2.b[7]
+; CHECK-SD-NEXT: and v0.8b, v3.8b, v1.8b
+; CHECK-SD-NEXT: and v2.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT: and v3.8b, v5.8b, v1.8b
+; CHECK-SD-NEXT: and v4.8b, v6.8b, v1.8b
+; CHECK-SD-NEXT: and v5.8b, v7.8b, v1.8b
+; CHECK-SD-NEXT: and v6.8b, v16.8b, v1.8b
+; CHECK-SD-NEXT: and v7.8b, v17.8b, v1.8b
+; CHECK-SD-NEXT: and v1.8b, v18.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-SD-N...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/136091
More information about the llvm-commits
mailing list