[llvm] [AArch64] Prefer using DUP instead of INS where possible (PR #138549)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 5 09:10:59 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Csanád Hajdú (Il-Capitano)
<details>
<summary>Changes</summary>
Replace all instances of `INS(IMPLICIT_DEF, 0, v, idx)` with `DUP(v, idx)` in instruction selection.
`INS` (e.g. `mov v0.s[0], v1.s[1]`) has a value dependency on its output register, which becomes a false dependency when we're inserting into an `IMPLICIT_DEF` register. We can break this false dependency by using `DUP` (e.g. `mov s0, v1.s[1]`) instead.
---
Patch is 31.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138549.diff
12 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+28-12)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+2-8)
- (modified) llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/bitcast-extend.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll (+4-6)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+56-56)
- (modified) llvm/test/CodeGen/AArch64/neon-bitcast.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll (+4-8)
- (modified) llvm/test/CodeGen/AArch64/neon-insextbitcast.ll (+51-6)
- (modified) llvm/test/CodeGen/AArch64/shuffle-extend.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/vector-fcvt.ll (+24-24)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 3962c7eba5833..18d13676bb26d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7349,7 +7349,8 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))),
// Patterns for i8/i16 -> v2i32/v4i16 lane moves via insert and extract that go via i32.
multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType OutVT,
- Instruction INS, SDNodeXForm VecIndexMult> {
+ Instruction INS, Instruction DUP, SubRegIndex DUPSub,
+ SDNodeXForm VecIndexMult> {
// VT64->OutVT
def : Pat<(OutVT (vector_insert (OutVT V64:$src),
(i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
@@ -7360,8 +7361,10 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
dsub)>;
def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
(EXTRACT_SUBREG
- (INS (IMPLICIT_DEF), 0,
- (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+ (VT128 (SUBREG_TO_REG
+ (i64 0),
+ (DUP (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+ DUPSub)),
dsub)>;
// VT128->OutVT
@@ -7374,25 +7377,38 @@ multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType O
dsub)>;
def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
(EXTRACT_SUBREG
- (INS (IMPLICIT_DEF), 0, V128:$Rn, imm:$Immn),
+ (VT128 (SUBREG_TO_REG
+ (i64 0),
+ (DUP V128:$Rn, imm:$Immn),
+ DUPSub)),
dsub)>;
}
-defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v4i16, INSvi8lane, VecIndex_x2>;
-defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v2i32, INSvi8lane, VecIndex_x4>;
-defm : Neon_INS_elt_ext_pattern<v8i16, v4i16, v2i32, INSvi16lane, VecIndex_x2>;
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v4i16, INSvi8lane, DUPi8, bsub, VecIndex_x2>;
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v2i32, INSvi8lane, DUPi8, bsub, VecIndex_x4>;
+defm : Neon_INS_elt_ext_pattern<v8i16, v4i16, v2i32, INSvi16lane, DUPi16, hsub, VecIndex_x2>;
// bitcast of an extract
-// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
-def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
- (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
+// f32 bitcast(vector_extract(v4i32 src, 0)) -> EXTRACT_SUBREG(src)
+def : Pat<(f32 (bitconvert (i32 (vector_extract v16i8:$src, (i64 0))))),
+ (EXTRACT_SUBREG V128:$src, bsub)>;
+def : Pat<(f32 (bitconvert (i32 (vector_extract v8i16:$src, (i64 0))))),
+ (EXTRACT_SUBREG V128:$src, hsub)>;
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, ssub)>;
-def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
- (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, dsub)>;
+// f32 bitcast(vector_extract(v4i32 src, lane)) -> DUPi32(src, lane)
+def : Pat<(f32 (bitconvert (i32 (vector_extract v16i8:$src, imm:$Immd)))),
+ (EXTRACT_SUBREG (v16i8 (SUBREG_TO_REG (i64 0), (DUPi8 V128:$src, imm:$Immd), bsub)), ssub)>;
+def : Pat<(f32 (bitconvert (i32 (vector_extract v8i16:$src, imm:$Immd)))),
+ (EXTRACT_SUBREG (v8i16 (SUBREG_TO_REG (i64 0), (DUPi16 V128:$src, imm:$Immd), hsub)), ssub)>;
+def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
+ (DUPi32 V128:$src, imm:$Immd)>;
+def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
+ (DUPi64 V128:$src, imm:$Immd)>;
+
// Floating point vector extractions are codegen'd as either a sequence of
// subregister extractions, or a MOV (aka DUP here) if
// the lane number is anything other than zero.
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index bd394671881e8..6be6e1a4bdf97 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3453,16 +3453,10 @@ let Predicates = [HasSVE_or_SME] in {
// Alternative case where insertelement is just scalar_to_vector rather than vector_insert.
def : Pat<(v1f64 (scalar_to_vector
(f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))),
- (EXTRACT_SUBREG
- (INSvi64lane (IMPLICIT_DEF), (i64 0),
- (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index),
- dsub)>;
+ (DUPi64 (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index)>;
def : Pat<(v1i64 (scalar_to_vector
(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))),
- (EXTRACT_SUBREG
- (INSvi64lane (IMPLICIT_DEF), (i64 0),
- (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index),
- dsub)>;
+ (DUPi64 (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index)>;
} // End HasNEON
let Predicates = [HasNEON] in {
diff --git a/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll b/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll
index e2d530ab421ef..07c4dbcf41096 100644
--- a/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll
@@ -193,7 +193,7 @@ define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone {
define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) {
; CHECK-LABEL: uqxtn_ext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov v0.d[0], v3.d[1]
+; CHECK-NEXT: mov d0, v3.d[1]
; CHECK-NEXT: uqxtn s0, d0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
@@ -219,7 +219,7 @@ entry:
define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) {
; CHECK-LABEL: sqxtun_insext:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov v1.d[0], v1.d[1]
+; CHECK-NEXT: mov d1, v1.d[1]
; CHECK-NEXT: sqxtun s1, d1
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 33238ccf86a39..3133d0efb4b9b 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -70,8 +70,8 @@ define <4 x i64> @z_i32_v4i64(i32 %x) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff
-; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov b2, v0.b[0]
+; CHECK-SD-NEXT: mov b3, v0.b[2]
; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0
@@ -172,8 +172,8 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
; CHECK-SD-LABEL: s_i32_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
-; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov b1, v0.b[0]
+; CHECK-SD-NEXT: mov b2, v0.b[2]
; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0
diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
index e90b6cb7f809b..65da95e0163f4 100644
--- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
+++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
@@ -5,7 +5,7 @@
define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECKLE-LABEL: test_reconstructshuffle:
; CHECKLE: // %bb.0:
-; CHECKLE-NEXT: mov v2.b[0], v0.b[3]
+; CHECKLE-NEXT: mov b2, v0.b[3]
; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECKLE-NEXT: mov v2.b[2], v0.b[2]
; CHECKLE-NEXT: mov v2.b[4], v0.b[1]
@@ -21,7 +21,7 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECKBE-NEXT: rev64 v1.16b, v1.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: mov v2.b[0], v0.b[3]
+; CHECKBE-NEXT: mov b2, v0.b[3]
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECKBE-NEXT: mov v2.b[2], v0.b[2]
; CHECKBE-NEXT: mov v2.b[4], v0.b[1]
diff --git a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
index 97c3a4937cda7..05422d3cc6051 100644
--- a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
@@ -347,9 +347,8 @@ define half @get_lane_64(<4 x half> %a) #0 {
; CHECK-LABEL: get_lane_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.h[2]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-NEXT: ret
entry:
%0 = bitcast <4 x half> %a to <4 x i16>
@@ -362,9 +361,8 @@ entry:
define half @get_lane_128(<8 x half> %a) #0 {
; CHECK-LABEL: get_lane_128:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: umov w8, v0.h[2]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-NEXT: ret
entry:
%0 = bitcast <8 x half> %a to <8 x i16>
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index fb2bdb4d63f47..34858940370e9 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -3443,10 +3443,10 @@ define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) {
; CHECK-SD-LABEL: stofp_v8i8_v8f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
-; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
-; CHECK-SD-NEXT: mov v3.b[0], v0.b[4]
-; CHECK-SD-NEXT: mov v4.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov b1, v0.b[0]
+; CHECK-SD-NEXT: mov b2, v0.b[2]
+; CHECK-SD-NEXT: mov b3, v0.b[4]
+; CHECK-SD-NEXT: mov b4, v0.b[6]
; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
; CHECK-SD-NEXT: mov v3.b[4], v0.b[5]
@@ -3492,10 +3492,10 @@ define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) {
; CHECK-SD-LABEL: utofp_v8i8_v8f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
-; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
-; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov b2, v0.b[0]
+; CHECK-SD-NEXT: mov b3, v0.b[2]
+; CHECK-SD-NEXT: mov b4, v0.b[4]
+; CHECK-SD-NEXT: mov b5, v0.b[6]
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
@@ -3538,14 +3538,14 @@ define <16 x double> @stofp_v16i8_v16f64(<16 x i8> %a) {
; CHECK-SD-LABEL: stofp_v16i8_v16f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
-; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
-; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
-; CHECK-SD-NEXT: mov v6.b[0], v1.b[0]
-; CHECK-SD-NEXT: mov v7.b[0], v1.b[2]
-; CHECK-SD-NEXT: mov v16.b[0], v1.b[4]
-; CHECK-SD-NEXT: mov v17.b[0], v1.b[6]
+; CHECK-SD-NEXT: mov b2, v0.b[0]
+; CHECK-SD-NEXT: mov b3, v0.b[2]
+; CHECK-SD-NEXT: mov b4, v0.b[4]
+; CHECK-SD-NEXT: mov b5, v0.b[6]
+; CHECK-SD-NEXT: mov b6, v1.b[0]
+; CHECK-SD-NEXT: mov b7, v1.b[2]
+; CHECK-SD-NEXT: mov b16, v1.b[4]
+; CHECK-SD-NEXT: mov b17, v1.b[6]
; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
; CHECK-SD-NEXT: mov v4.b[4], v0.b[5]
@@ -3622,15 +3622,15 @@ define <16 x double> @utofp_v16i8_v16f64(<16 x i8> %a) {
; CHECK-SD-LABEL: utofp_v16i8_v16f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: mov v3.b[0], v0.b[0]
-; CHECK-SD-NEXT: mov v4.b[0], v0.b[2]
-; CHECK-SD-NEXT: mov v5.b[0], v0.b[4]
-; CHECK-SD-NEXT: mov v6.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov b3, v0.b[0]
+; CHECK-SD-NEXT: mov b4, v0.b[2]
+; CHECK-SD-NEXT: mov b5, v0.b[4]
+; CHECK-SD-NEXT: mov b6, v0.b[6]
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT: mov v7.b[0], v2.b[0]
-; CHECK-SD-NEXT: mov v16.b[0], v2.b[2]
-; CHECK-SD-NEXT: mov v17.b[0], v2.b[4]
-; CHECK-SD-NEXT: mov v18.b[0], v2.b[6]
+; CHECK-SD-NEXT: mov b7, v2.b[0]
+; CHECK-SD-NEXT: mov b16, v2.b[2]
+; CHECK-SD-NEXT: mov b17, v2.b[4]
+; CHECK-SD-NEXT: mov b18, v2.b[6]
; CHECK-SD-NEXT: mov v3.b[4], v0.b[1]
; CHECK-SD-NEXT: mov v4.b[4], v0.b[3]
; CHECK-SD-NEXT: mov v5.b[4], v0.b[5]
@@ -3699,18 +3699,18 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) {
; CHECK-SD-LABEL: stofp_v32i8_v32f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: mov v5.b[0], v1.b[6]
-; CHECK-SD-NEXT: mov v17.b[0], v1.b[4]
-; CHECK-SD-NEXT: mov v20.b[0], v1.b[2]
-; CHECK-SD-NEXT: mov v21.b[0], v1.b[0]
-; CHECK-SD-NEXT: mov v18.b[0], v0.b[0]
-; CHECK-SD-NEXT: mov v19.b[0], v0.b[6]
-; CHECK-SD-NEXT: mov v22.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov b5, v1.b[6]
+; CHECK-SD-NEXT: mov b17, v1.b[4]
+; CHECK-SD-NEXT: mov b20, v1.b[2]
+; CHECK-SD-NEXT: mov b21, v1.b[0]
+; CHECK-SD-NEXT: mov b18, v0.b[0]
+; CHECK-SD-NEXT: mov b19, v0.b[6]
+; CHECK-SD-NEXT: mov b22, v0.b[4]
; CHECK-SD-NEXT: ext v16.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v2.b[0], v3.b[0]
-; CHECK-SD-NEXT: mov v4.b[0], v3.b[2]
-; CHECK-SD-NEXT: mov v6.b[0], v3.b[4]
-; CHECK-SD-NEXT: mov v7.b[0], v3.b[6]
+; CHECK-SD-NEXT: mov b2, v3.b[0]
+; CHECK-SD-NEXT: mov b4, v3.b[2]
+; CHECK-SD-NEXT: mov b6, v3.b[4]
+; CHECK-SD-NEXT: mov b7, v3.b[6]
; CHECK-SD-NEXT: mov v5.b[4], v1.b[7]
; CHECK-SD-NEXT: mov v17.b[4], v1.b[5]
; CHECK-SD-NEXT: mov v20.b[4], v1.b[3]
@@ -3718,16 +3718,16 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) {
; CHECK-SD-NEXT: mov v19.b[4], v0.b[7]
; CHECK-SD-NEXT: mov v22.b[4], v0.b[5]
; CHECK-SD-NEXT: mov v18.b[4], v0.b[1]
-; CHECK-SD-NEXT: mov v23.b[0], v16.b[0]
+; CHECK-SD-NEXT: mov b23, v16.b[0]
; CHECK-SD-NEXT: mov v2.b[4], v3.b[1]
; CHECK-SD-NEXT: mov v4.b[4], v3.b[3]
; CHECK-SD-NEXT: mov v6.b[4], v3.b[5]
; CHECK-SD-NEXT: mov v7.b[4], v3.b[7]
-; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov b3, v0.b[2]
; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24
; CHECK-SD-NEXT: shl v17.2s, v17.2s, #24
; CHECK-SD-NEXT: shl v20.2s, v20.2s, #24
-; CHECK-SD-NEXT: mov v24.b[0], v16.b[4]
+; CHECK-SD-NEXT: mov b24, v16.b[4]
; CHECK-SD-NEXT: mov v23.b[4], v16.b[1]
; CHECK-SD-NEXT: shl v18.2s, v18.2s, #24
; CHECK-SD-NEXT: shl v19.2s, v19.2s, #24
@@ -3739,10 +3739,10 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) {
; CHECK-SD-NEXT: shl v0.2s, v21.2s, #24
; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24
; CHECK-SD-NEXT: shl v6.2s, v7.2s, #24
-; CHECK-SD-NEXT: mov v7.b[0], v16.b[2]
+; CHECK-SD-NEXT: mov b7, v16.b[2]
; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: sshr v20.2s, v20.2s, #24
-; CHECK-SD-NEXT: mov v21.b[0], v16.b[6]
+; CHECK-SD-NEXT: mov b21, v16.b[6]
; CHECK-SD-NEXT: sshll v17.2d, v17.2s, #0
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT: shl v22.2s, v22.2s, #24
@@ -3869,25 +3869,25 @@ entry:
define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) {
; CHECK-SD-LABEL: utofp_v32i8_v32f64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: mov v6.b[0], v1.b[6]
-; CHECK-SD-NEXT: mov v7.b[0], v1.b[4]
+; CHECK-SD-NEXT: mov b6, v1.b[6]
+; CHECK-SD-NEXT: mov b7, v1.b[4]
; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v16.b[0], v1.b[2]
-; CHECK-SD-NEXT: mov v17.b[0], v1.b[0]
-; CHECK-SD-NEXT: mov v19.b[0], v0.b[6]
-; CHECK-SD-NEXT: mov v20.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov b16, v1.b[2]
+; CHECK-SD-NEXT: mov b17, v1.b[0]
+; CHECK-SD-NEXT: mov b19, v0.b[6]
+; CHECK-SD-NEXT: mov b20, v0.b[4]
; CHECK-SD-NEXT: movi d5, #0x0000ff000000ff
-; CHECK-SD-NEXT: mov v24.b[0], v0.b[2]
-; CHECK-SD-NEXT: mov v25.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov b24, v0.b[2]
+; CHECK-SD-NEXT: mov b25, v0.b[0]
; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
; CHECK-SD-NEXT: mov v6.b[4], v1.b[7]
; CHECK-SD-NEXT: mov v7.b[4], v1.b[5]
-; CHECK-SD-NEXT: mov v18.b[0], v3.b[0]
-; CHECK-SD-NEXT: mov v21.b[0], v3.b[2]
-; CHECK-SD-NEXT: mov v23.b[0], v3.b[4]
+; CHECK-SD-NEXT: mov b18, v3.b[0]
+; CHECK-SD-NEXT: mov b21, v3.b[2]
+; CHECK-SD-NEXT: mov b23, v3.b[4]
; CHECK-SD-NEXT: mov v16.b[4], v1.b[3]
; CHECK-SD-NEXT: mov v17.b[4], v1.b[1]
-; CHECK-SD-NEXT: mov v1.b[0], v3.b[6]
+; CHECK-SD-NEXT: mov b1, v3.b[6]
; CHECK-SD-NEXT: mov v19.b[4], v0.b[7]
; CHECK-SD-NEXT: mov v20.b[4], v0.b[5]
; CHECK-SD-NEXT: mov v24.b[4], v0.b[3]
@@ -3905,15 +3905,15 @@ define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) {
; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
; CHECK-SD-NEXT: and v20.8b, v20.8b, v5.8b
; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0
-; CHECK-SD-NEXT: mov v4.b[0], v2.b[0]
-; CHECK-SD-NEXT: mov v22.b[0], v2.b[2]
+; CHECK-SD-NEXT: mov b4, v2.b[0]
+; CHECK-SD-NEXT: mov b22, v2.b[2]
; CHECK-SD-NEXT: ushll v17.2d, v17.2s, #0
; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0
-; CHECK-SD-NEXT: mov v19.b[0], v2.b[4]
+; CHECK-SD-NEXT: mov b19, v2.b[4]
; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d
; CHECK-SD-NEXT: ucvtf v3.2d, v7.2d
; CHECK-SD-NEXT: ushll v20.2d, v20.2s, #0
-; CHECK-SD-NEXT: mov v7.b[0], v2.b[6]
+; CHECK-SD-NEXT: mov b7, v2.b[6]
; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d
; CHECK-SD-NEXT: and v24.8b, v24.8b, v5.8b
; CHECK-SD-NEXT: ucvtf v17.2d, v17.2d
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
index c039da26b7c15..c6aa8701e1721 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
@@ -555,7 +555,7 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
; CHECK-LE-LABEL: bitcast_i16_to_v2i8:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: fmov s1, w0
-; CHECK-LE-NEXT: mov v0.b[0], v1.b[0]
+; CHECK-LE-NEXT: mov b0, v1.b[0]
; CHECK-LE-NEXT: mov v0.b[4], v1.b[1]
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT: ret
@@ -564,7 +564,7 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev16 v0.16b, v0.16b
-; CHECK-BE-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-BE-NEXT: mov b1, v0.b[0]
; CHECK-BE-NEXT: mov v1.b[4], v0.b[1]
; CHECK-BE-NEXT: rev64 v0.2s, v1.2s
; CHECK-BE-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
index 0f4eec4fdfda1..bfdf794c1c27a 100644
--- a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
+++ b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
@@ -360,8 +360,7 @@ define <4 x i32> @test_q_lane4_nxv4i32(<4 x i32> %a, <vscale x 4 x i32> %b) {
define <1 x double> @test_lane0_nxv2f64(<1 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: test_lane0_nxv2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov v0.d[0], v1.d[0]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: mov d0, v1.d[0]
; CHECK-NEXT: ret
%c = extractelement <vscale ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/138549
More information about the llvm-commits
mailing list