[llvm] [AArch64][GlobalISel] Ensure we have a insert-subreg v4i32 GPR pattern (PR #142724)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 23:48:32 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This is the GISel equivalent of scalar_to_vector, making sure that when we insert into undef we use a fmov that avoids the artificial dependency on the previous register. This adds v2i32 and v2i64 patterns too for similar reasons.
---
Patch is 408.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142724.diff
46 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+7)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir (+4-7)
- (modified) llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+56-56)
- (modified) llvm/test/CodeGen/AArch64/abs.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/arm64-dup.ll (+18-39)
- (modified) llvm/test/CodeGen/AArch64/arm64-fp128.ll (+7-7)
- (modified) llvm/test/CodeGen/AArch64/arm64-neon-copy.ll (+12-28)
- (modified) llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll (+18-38)
- (modified) llvm/test/CodeGen/AArch64/bitcast-extend.ll (+18-18)
- (modified) llvm/test/CodeGen/AArch64/bitcast.ll (+18-29)
- (modified) llvm/test/CodeGen/AArch64/bswap.ll (+1-2)
- (modified) llvm/test/CodeGen/AArch64/concat-vector.ll (+9-19)
- (modified) llvm/test/CodeGen/AArch64/ctlz.ll (+11-11)
- (modified) llvm/test/CodeGen/AArch64/cttz.ll (+9-9)
- (modified) llvm/test/CodeGen/AArch64/fcmp.ll (+104-104)
- (modified) llvm/test/CodeGen/AArch64/fcopysign.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/fptoi.ll (+10-10)
- (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+13-24)
- (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+13-24)
- (modified) llvm/test/CodeGen/AArch64/freeze.ll (+2-3)
- (modified) llvm/test/CodeGen/AArch64/fsh.ll (+337-337)
- (modified) llvm/test/CodeGen/AArch64/icmp.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/insertextract.ll (+7-15)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/mul.ll (+17-17)
- (modified) llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/neon-dotreduce.ll (+2496-2272)
- (modified) llvm/test/CodeGen/AArch64/phi.ll (+15-14)
- (modified) llvm/test/CodeGen/AArch64/popcount.ll (+8-14)
- (modified) llvm/test/CodeGen/AArch64/ptradd.ll (+16-25)
- (modified) llvm/test/CodeGen/AArch64/rem.ll (+314-306)
- (modified) llvm/test/CodeGen/AArch64/select_cc.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sext.ll (+42-42)
- (modified) llvm/test/CodeGen/AArch64/shift.ll (+3-6)
- (modified) llvm/test/CodeGen/AArch64/shufflevector.ll (+8-14)
- (modified) llvm/test/CodeGen/AArch64/trunc.ll (+8-14)
- (modified) llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll (+26-26)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+148-114)
- (modified) llvm/test/CodeGen/AArch64/xtn.ll (+26-53)
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+43-43)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 727831896737d..3cea0b58c4439 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7269,6 +7269,13 @@ def : Pat<(v4i16 (vec_ins_or_scal_vec GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+def : Pat<(v2i32 (vec_ins_or_scal_vec GPR32:$Rn)),
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GPR32:$Rn, ssub)>;
+def : Pat<(v4i32 (vec_ins_or_scal_vec GPR32:$Rn)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GPR32:$Rn, ssub)>;
+def : Pat<(v2i64 (vec_ins_or_scal_vec GPR64:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GPR64:$Rn, dsub)>;
+
def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
index be80886ed3efe..ba53cb57c2ef2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
@@ -11,7 +11,7 @@ define i32 @bar() {
; CHECK-NEXT: movi.2d v0, #0000000000000000
; CHECK-NEXT: umov.b w8, v0[0]
; CHECK-NEXT: umov.b w9, v0[1]
-; CHECK-NEXT: mov.s v1[0], w8
+; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: umov.b w8, v0[2]
; CHECK-NEXT: mov.s v1[1], w9
; CHECK-NEXT: umov.b w9, v0[3]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
index 0115531dfb09a..22d1ccc056eb4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
@@ -57,15 +57,12 @@ body: |
; SELECT-NEXT: %r:gpr32 = COPY $w0
; SELECT-NEXT: %q:gpr32 = COPY $w1
; SELECT-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
+ ; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr64 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
; SELECT-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
- ; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DEF]], %subreg.dsub
- ; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 0, %r
+ ; SELECT-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[INSERT_SUBREG]], %subreg.dsub
+ ; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %q
; SELECT-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub
- ; SELECT-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
- ; SELECT-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub
- ; SELECT-NEXT: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %q
- ; SELECT-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr1]].dsub
- ; SELECT-NEXT: $d0 = COPY [[COPY1]]
+ ; SELECT-NEXT: $d0 = COPY [[COPY]]
; SELECT-NEXT: RET_ReallyLR implicit $d0
%r:_(s32) = COPY $w0
%q:_(s32) = COPY $w1
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
index 9734ab35bd6b2..7f922c0047553 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
@@ -76,8 +76,7 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
; CHECK-GI-NEXT: bic w9, w9, w8
; CHECK-GI-NEXT: and w8, w8, w10
; CHECK-GI-NEXT: orr w8, w9, w8
-; CHECK-GI-NEXT: mov v0.s[0], w8
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
%neg = xor <1 x i32> %C, <i32 -1>
%and = and <1 x i32> %neg, %B
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 45ad4b07ff66f..b8eb8269d605c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -76,8 +76,7 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
; CHECK-GI-NEXT: and w9, w8, w9
; CHECK-GI-NEXT: bic w8, w10, w8
; CHECK-GI-NEXT: orr w8, w9, w8
-; CHECK-GI-NEXT: mov v0.s[0], w8
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
%and = and <1 x i32> %C, %B
%neg = xor <1 x i32> %C, <i32 -1>
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 418113a4e4e09..8655bb1292ef7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -204,7 +204,7 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-GI-NEXT: ldrh w9, [x2], #16
; CHECK-GI-NEXT: subs x8, x8, #8
-; CHECK-GI-NEXT: mov v2.s[0], w9
+; CHECK-GI-NEXT: fmov s2, w9
; CHECK-GI-NEXT: mov w9, w0
; CHECK-GI-NEXT: add w0, w0, #8
; CHECK-GI-NEXT: lsl x9, x9, #2
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index d6fd4c4110a12..2f23a32c36a9f 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2282,14 +2282,14 @@ define <2 x i64> @asr(<2 x i64> %a, <2 x i64> %b) {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32
; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #32
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: mov x10, v0.d[1]
-; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: fmov x11, d1
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
%x = ashr <2 x i64> %a, <i64 32, i64 32>
%y = ashr <2 x i64> %b, <i64 32, i64 32>
@@ -2317,14 +2317,14 @@ define <2 x i64> @asr_const(<2 x i64> %a, <2 x i64> %b) {
; CHECK-GI-NEXT: adrp x8, .LCPI81_0
; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI81_0]
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: mov x10, v0.d[1]
-; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: fmov x11, d1
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
%x = ashr <2 x i64> %a, <i64 32, i64 32>
%z = mul nsw <2 x i64> %x, <i64 31, i64 31>
@@ -2799,14 +2799,14 @@ define <2 x i64> @sdistribute_v2i32(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0
; CHECK-GI-NEXT: saddl v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d2
-; CHECK-GI-NEXT: mov x10, v0.d[1]
-; CHECK-GI-NEXT: mov x11, v2.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: fmov x11, d2
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v2.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
entry:
%4 = sext <2 x i32> %src1 to <2 x i64>
@@ -2838,14 +2838,14 @@ define <2 x i64> @sdistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI101_0]
; CHECK-GI-NEXT: saddw v0.2d, v2.2d, v0.2s
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: mov x11, v1.d[1]
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: fmov x11, d1
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
entry:
%4 = sext <2 x i32> %src1 to <2 x i64>
@@ -2875,14 +2875,14 @@ define <2 x i64> @sdistribute_const2_v2i32(<2 x i32> %src1, <2 x i32> %src2) {
; CHECK-GI-NEXT: adrp x8, .LCPI102_0
; CHECK-GI-NEXT: saddl v0.2d, v0.2s, v1.2s
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI102_0]
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: mov x10, v0.d[1]
-; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: fmov x11, d1
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
entry:
%4 = sext <2 x i32> %src1 to <2 x i64>
@@ -2909,14 +2909,14 @@ define <2 x i64> @udistribute_v2i32(<2 x i32> %src1, <2 x i32> %src2, <2 x i32>
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-GI-NEXT: uaddl v0.2d, v0.2s, v1.2s
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d2
-; CHECK-GI-NEXT: mov x10, v0.d[1]
-; CHECK-GI-NEXT: mov x11, v2.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: fmov x11, d2
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v2.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
entry:
%4 = zext <2 x i32> %src1 to <2 x i64>
@@ -2948,14 +2948,14 @@ define <2 x i64> @udistribute_const1_v2i32(<2 x i32> %src1, <2 x i32> %mul) {
; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI104_0]
; CHECK-GI-NEXT: uaddw v0.2d, v2.2d, v0.2s
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: mov x11, v1.d[1]
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: mov x10, v0.d[1]
+; CHECK-GI-NEXT: fmov x11, d1
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
entry:
%4 = zext <2 x i32> %src1 to <2 x i64>
@@ -2985,14 +2985,14 @@ define <2 x i64> @udistribute_const2_v2i32(<2 x i32> %src1, <2 x i32> %src2) {
; CHECK-GI-NEXT: adrp x8, .LCPI105_0
; CHECK-GI-NEXT: uaddl v0.2d, v0.2s, v1.2s
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI105_0]
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: mov x10, v0.d[1]
-; CHECK-GI-NEXT: mov x11, v1.d[1]
+; CHECK-GI-NEXT: fmov x10, d0
+; CHECK-GI-NEXT: fmov x11, d1
+; CHECK-GI-NEXT: mov x8, v0.d[1]
+; CHECK-GI-NEXT: mov x9, v1.d[1]
+; CHECK-GI-NEXT: mul x10, x10, x11
; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: mul x9, x10, x11
-; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: fmov d0, x10
+; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: ret
entry:
%4 = zext <2 x i32> %src1 to <2 x i64>
diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index d501d9ed24547..0f56d25a47b2a 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -247,8 +247,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
; CHECK-GI-NEXT: fmov w9, s0
; CHECK-GI-NEXT: cmp w8, #0
; CHECK-GI-NEXT: cneg w8, w9, le
-; CHECK-GI-NEXT: mov v0.s[0], w8
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
entry:
%res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 4c28ea7592202..c279cf0f241d2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -334,40 +334,25 @@ entry:
}
define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone {
-; CHECK-SD-LABEL: f:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: mov.s v0[1], w1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: f:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov.s v0[0], w0
-; CHECK-GI-NEXT: mov.s v0[1], w1
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: f:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov.s v0[1], w1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
%vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
%vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
ret <2 x i32> %vecinit1
}
define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone {
-; CHECK-SD-LABEL: g:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: mov.s v0[1], w1
-; CHECK-SD-NEXT: mov.s v0[2], w1
-; CHECK-SD-NEXT: mov.s v0[3], w0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: g:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov.s v0[0], w0
-; CHECK-GI-NEXT: mov.s v0[1], w1
-; CHECK-GI-NEXT: mov.s v0[2], w1
-; CHECK-GI-NEXT: mov.s v0[3], w0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: g:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov.s v0[1], w1
+; CHECK-NEXT: mov.s v0[2], w1
+; CHECK-NEXT: mov.s v0[3], w0
+; CHECK-NEXT: ret
%vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
%vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
@@ -376,17 +361,11 @@ define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone {
}
define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone {
-; CHECK-SD-LABEL: h:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov.d v0[1], x1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: h:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov.d v0[0], x0
-; CHECK-GI-NEXT: mov.d v0[1], x1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: h:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: mov.d v0[1], x1
+; CHECK-NEXT: ret
%vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
%vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
ret <2 x i64> %vecinit1
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 7eb26096ed156..a75f6419d5a5a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -618,7 +618,7 @@ define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) {
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov w19, w0
; CHECK-GI-NEXT: bl __fixtfsi
-; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: fmov s0, w19
; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.s[1], w0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -661,7 +661,7 @@ define <2 x i64> @vec_fptosi_64(<2 x fp128> %val) {
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: bl __fixtfdi
-; CHECK-GI-NEXT: mov v0.d[0], x19
+; CHECK-GI-NEXT: fmov d0, x19
; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.d[1], x0
; CHECK-GI-NEXT: add sp, sp, #32
@@ -702,7 +702,7 @@ define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) {
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov w19, w0
; CHECK-GI-NEXT: bl __fixunstfsi
-; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: fmov s0, w19
; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.s[1], w0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
@@ -745,7 +745,7 @@ define <2 x i64> @vec_fptoui_64(<2 x fp128> %val) {
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: bl __fixunstfdi
-; CHECK-GI-NEXT: mov v0.d[0], x19
+; CHECK-GI-NEXT: fmov d0, x19
; CHECK-GI-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.d[1], x0
; CHECK-GI-NEXT: add sp, sp, #32
@@ -977,7 +977,7 @@ define <2 x i1> @vec_setcc1(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: cset w19, le
; CHECK-GI-NEXT: bl __letf2
-; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: fmov s0, w19
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: cset w8, le
; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
@@ -1032,7 +1032,7 @@ define <2 x i1> @vec_setcc2(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: cset w19, gt
; CHECK-GI-NEXT: bl __letf2
-; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: fmov s0, w19
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: cset w8, gt
; CHECK-GI-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
@@ -1109,7 +1109,7 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) {
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: cset w20, eq
; CHECK-GI-NEXT: bl __unordtf2
-; CHECK-GI-NEXT: mov v0.s[0], w19
+; CHECK-GI-NEXT: fmov s0, w19
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-GI-NEXT: cset w8, ne
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index 51f1351a5edf4..ddd8a72618b1e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1198,44 +1198,28 @@ define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
}
define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
-; CHECK-SD-LABEL: scalar_to_vector.v2i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: scalar_to_vector.v2i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov v0.s[0], w0
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: scalar_to_vector.v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ret
%b = insertelement <2 x i32> undef, i32 %a, i32 0
ret <2 x i32> %b
}
define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
-; CHECK-SD-LABEL: scalar_to_vector.v4i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: scalar_to_vector.v4i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov v0.s[0], w0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: scalar_to_vector.v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ret
%b = insertelement <4 x i32> undef, i32 %a, i32 0
ret <4 x i32> %b
}
define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
-; CHECK-SD-LABEL: scalar_to_vector.v2i64:
-; CHECK-SD: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142724
More information about the llvm-commits
mailing list