[llvm-branch-commits] [llvm] [AArch64] Fold zero-high vector inserts in MI peephole optimisation (PR #182835)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 23 03:53:54 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Amina Chabane (Amichaxx)
<details>
<summary>Changes</summary>
Summary
This patch follows on from #<!-- -->178227.
The previous ISel fold lowers the 64-bit case to:
fmov d0, x0
fmov d0, d0
which is not ideal and could be fmov d0, x0.
A redundant copy comes from the INSERT_SUBREG/INSvi64lane.
This peephole detects <2 x i64> vectors made of a zeroed upper and low lane produced by FMOVXDr/FMOVDr, then removes the redundant copy.
Further updated tests and added MIR tests.
---
Full diff: https://github.com/llvm/llvm-project/pull/182835.diff
8 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+47-4)
- (modified) llvm/test/CodeGen/AArch64/aarch64-addv.ll (-3)
- (modified) llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll (+7-8)
- (modified) llvm/test/CodeGen/AArch64/bitcast-extend.ll (-1)
- (modified) llvm/test/CodeGen/AArch64/ctpop.ll (-1)
- (modified) llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll (+24-24)
- (modified) llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll (-1)
- (modified) llvm/test/CodeGen/AArch64/peephole-insvigpr.mir (+51)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 21ff921da9b8a..398273babe1b1 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -687,14 +687,57 @@ bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
}
// All instructions that set a FPR64 will implicitly zero the top bits of the
-// register.
+// register. When the def is expressed as a COPY from a GPR, turn it into an
+// explicit FMOV so it cannot be elided later in further passes.
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
- MachineRegisterInfo *MRI) {
+ MachineRegisterInfo *MRI,
+ const AArch64InstrInfo *TII) {
if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
return false;
const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
if (RC != &AArch64::FPR64RegClass)
return false;
+ if (MI->getOpcode() == TargetOpcode::COPY) {
+ MachineOperand &SrcOp = MI->getOperand(1);
+ if (!SrcOp.isReg())
+ return false;
+ if (SrcOp.getSubReg())
+ return false;
+ Register SrcReg = SrcOp.getReg();
+ auto IsGPR64Like = [&]() -> bool {
+ if (SrcReg.isVirtual())
+ return AArch64::GPR64allRegClass.hasSubClassEq(
+ MRI->getRegClass(SrcReg));
+ return AArch64::GPR64allRegClass.contains(SrcReg);
+ };
+ if (!IsGPR64Like())
+ return false;
+ assert(TII && "Expected InstrInfo when materializing COPYs");
+ // FMOVXDr insists on strict GPR64 operands, so fix up the COPY source.
+ MachineOperand &SrcMO = MI->getOperand(1);
+ bool SrcKill = SrcMO.isKill();
+ if (SrcReg.isVirtual()) {
+ if (MRI->getRegClass(SrcReg) != &AArch64::GPR64RegClass) {
+ // Pass the value through a temporary GPR64 vreg to satisfy the
+ // verifier.
+ Register NewSrc = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewSrc)
+ .addReg(SrcReg, getKillRegState(SrcKill));
+ SrcReg = NewSrc;
+ SrcKill = true;
+ }
+ } else if (!AArch64::GPR64RegClass.contains(SrcReg)) {
+ return false;
+ }
+ SrcMO.setReg(SrcReg);
+ SrcMO.setSubReg(0);
+ SrcMO.setIsKill(SrcKill);
+ // Replace the COPY with an explicit FMOV so the zeroing behaviour stays
+ // visible.
+ MI->setDesc(TII->get(AArch64::FMOVXDr));
+ return true;
+ }
return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
}
@@ -710,7 +753,7 @@ bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
return false;
Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
- if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
+ if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI, TII))
return false;
// Check there is `mov 0` MI for high 64-bits.
@@ -751,7 +794,7 @@ bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
// An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
- if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
+ if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI, TII))
return false;
// Let's remove MIs for high 64-bits.
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index d8aeeff79b936..de68a79824eb3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -545,7 +545,6 @@ define i8 @addv_zero_lanes_v16i8(ptr %arr) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldrb w8, [x0]
; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d0, d0
; CHECK-SD-NEXT: addv b0, v0.16b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
@@ -570,7 +569,6 @@ define i16 @addv_zero_lanes_v8i16(ptr %arr) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldrh w8, [x0]
; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d0, d0
; CHECK-SD-NEXT: addv h0, v0.8h
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
@@ -595,7 +593,6 @@ define i32 @addv_zero_lanes_v4i32(ptr %arr) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr w8, [x0]
; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d0, d0
; CHECK-SD-NEXT: addv s0, v0.4s
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index fa982ce27c7d0..ff2d5c68af531 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -823,14 +823,13 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none
; CHECK-SD-NEXT: cbz x11, .LBB6_13
; CHECK-SD-NEXT: .LBB6_10: // %vec.epilog.ph
; CHECK-SD-NEXT: mov w11, w1
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: movi v1.2d, #0000000000000000
-; CHECK-SD-NEXT: sxtb x8, w11
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
; CHECK-SD-NEXT: movi v3.2d, #0x000000000000ff
+; CHECK-SD-NEXT: sxtb x11, w11
+; CHECK-SD-NEXT: fmov d2, x8
+; CHECK-SD-NEXT: dup v1.2s, w11
; CHECK-SD-NEXT: mov x11, x10
; CHECK-SD-NEXT: and x10, x9, #0xfffffffc
-; CHECK-SD-NEXT: fmov d0, d0
-; CHECK-SD-NEXT: dup v2.2s, w8
; CHECK-SD-NEXT: sub x8, x11, x10
; CHECK-SD-NEXT: add x11, x0, x11
; CHECK-SD-NEXT: .LBB6_11: // %vec.epilog.vector.body
@@ -845,11 +844,11 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none
; CHECK-SD-NEXT: and v4.16b, v4.16b, v3.16b
; CHECK-SD-NEXT: xtn v5.2s, v5.2d
; CHECK-SD-NEXT: xtn v4.2s, v4.2d
-; CHECK-SD-NEXT: smlal v1.2d, v2.2s, v4.2s
-; CHECK-SD-NEXT: smlal v0.2d, v2.2s, v5.2s
+; CHECK-SD-NEXT: smlal v0.2d, v1.2s, v4.2s
+; CHECK-SD-NEXT: smlal v2.2d, v1.2s, v5.2s
; CHECK-SD-NEXT: b.ne .LBB6_11
; CHECK-SD-NEXT: // %bb.12: // %vec.epilog.middle.block
-; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: add v0.2d, v2.2d, v0.2d
; CHECK-SD-NEXT: cmp x10, x9
; CHECK-SD-NEXT: addp d0, v0.2d
; CHECK-SD-NEXT: fmov x8, d0
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 2bd91a8dc9a7d..b981c1701725a 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -341,7 +341,6 @@ define <16 x i8> @load_zext_v16i8(ptr %p) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ldr w8, [x0]
; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: load_zext_v16i8:
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index df817afb12368..84984c23f129e 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -602,7 +602,6 @@ define i128 @i128_mask(i128 %x) {
; CHECK-SD-NEXT: and x8, x0, #0xff
; CHECK-SD-NEXT: mov x1, xzr
; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d0, d0
; CHECK-SD-NEXT: cnt v0.16b, v0.16b
; CHECK-SD-NEXT: addv b0, v0.16b
; CHECK-SD-NEXT: fmov x0, d0
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index fd7c869fe2f92..137a7feb1a85c 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -829,9 +829,9 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-CVT-SD-NEXT: cmp x20, #0
-; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-CVT-SD-NEXT: fmov d0, x8
-; CHECK-CVT-SD-NEXT: fmov d1, x9
+; CHECK-CVT-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-CVT-SD-NEXT: fmov d1, x8
; CHECK-CVT-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-CVT-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-SD-NEXT: add sp, sp, #48
@@ -858,9 +858,9 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-FP16-SD-NEXT: cmp x20, #0
-; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-FP16-SD-NEXT: fmov d0, x8
-; CHECK-FP16-SD-NEXT: fmov d1, x9
+; CHECK-FP16-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-FP16-SD-NEXT: fmov d1, x8
; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-FP16-SD-NEXT: add sp, sp, #48
@@ -1296,9 +1296,9 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-CVT-SD-NEXT: cmp x20, #0
-; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-CVT-SD-NEXT: fmov d0, x8
-; CHECK-CVT-SD-NEXT: fmov d1, x9
+; CHECK-CVT-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-CVT-SD-NEXT: fmov d1, x8
; CHECK-CVT-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-CVT-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-SD-NEXT: add sp, sp, #48
@@ -1326,9 +1326,9 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-FP16-SD-NEXT: cmp x20, #0
-; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-FP16-SD-NEXT: fmov d0, x8
-; CHECK-FP16-SD-NEXT: fmov d1, x9
+; CHECK-FP16-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-FP16-SD-NEXT: fmov d1, x8
; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-FP16-SD-NEXT: add sp, sp, #48
@@ -1748,9 +1748,9 @@ define <2 x i64> @utest_f16i64(<2 x half> %x) {
; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-CVT-SD-NEXT: cmp x20, #0
-; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-CVT-SD-NEXT: fmov d0, x8
-; CHECK-CVT-SD-NEXT: fmov d1, x9
+; CHECK-CVT-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-CVT-SD-NEXT: fmov d1, x8
; CHECK-CVT-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-CVT-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-SD-NEXT: add sp, sp, #48
@@ -1778,9 +1778,9 @@ define <2 x i64> @utest_f16i64(<2 x half> %x) {
; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-FP16-SD-NEXT: cmp x20, #0
-; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-FP16-SD-NEXT: fmov d0, x8
-; CHECK-FP16-SD-NEXT: fmov d1, x9
+; CHECK-FP16-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-FP16-SD-NEXT: fmov d1, x8
; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-FP16-SD-NEXT: add sp, sp, #48
@@ -2774,9 +2774,9 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-CVT-SD-NEXT: cmp x20, #0
-; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-CVT-SD-NEXT: fmov d0, x8
-; CHECK-CVT-SD-NEXT: fmov d1, x9
+; CHECK-CVT-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-CVT-SD-NEXT: fmov d1, x8
; CHECK-CVT-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-CVT-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-SD-NEXT: add sp, sp, #48
@@ -2803,9 +2803,9 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-FP16-SD-NEXT: cmp x20, #0
-; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-FP16-SD-NEXT: fmov d0, x8
-; CHECK-FP16-SD-NEXT: fmov d1, x9
+; CHECK-FP16-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-FP16-SD-NEXT: fmov d1, x8
; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-FP16-SD-NEXT: add sp, sp, #48
@@ -3232,9 +3232,9 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-CVT-SD-NEXT: cmp x20, #0
-; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-CVT-SD-NEXT: fmov d0, x8
-; CHECK-CVT-SD-NEXT: fmov d1, x9
+; CHECK-CVT-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-CVT-SD-NEXT: fmov d1, x8
; CHECK-CVT-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-CVT-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-SD-NEXT: add sp, sp, #48
@@ -3262,9 +3262,9 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-FP16-SD-NEXT: cmp x20, #0
-; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-FP16-SD-NEXT: fmov d0, x8
-; CHECK-FP16-SD-NEXT: fmov d1, x9
+; CHECK-FP16-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-FP16-SD-NEXT: fmov d1, x8
; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-FP16-SD-NEXT: add sp, sp, #48
@@ -3675,9 +3675,9 @@ define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
; CHECK-CVT-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-CVT-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-CVT-SD-NEXT: cmp x20, #0
-; CHECK-CVT-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-CVT-SD-NEXT: fmov d0, x8
-; CHECK-CVT-SD-NEXT: fmov d1, x9
+; CHECK-CVT-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-CVT-SD-NEXT: fmov d1, x8
; CHECK-CVT-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-CVT-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-SD-NEXT: add sp, sp, #48
@@ -3705,9 +3705,9 @@ define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
; CHECK-FP16-SD-NEXT: ldr x30, [sp, #16] // 8-byte Reload
; CHECK-FP16-SD-NEXT: csel x8, x0, xzr, eq
; CHECK-FP16-SD-NEXT: cmp x20, #0
-; CHECK-FP16-SD-NEXT: csel x9, x19, xzr, eq
; CHECK-FP16-SD-NEXT: fmov d0, x8
-; CHECK-FP16-SD-NEXT: fmov d1, x9
+; CHECK-FP16-SD-NEXT: csel x8, x19, xzr, eq
+; CHECK-FP16-SD-NEXT: fmov d1, x8
; CHECK-FP16-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-FP16-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-FP16-SD-NEXT: add sp, sp, #48
diff --git a/llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll b/llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll
index 4e30813187fec..38be2992c8211 100644
--- a/llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll
+++ b/llvm/test/CodeGen/AArch64/neon-lowhalf128-optimisation.ll
@@ -5,7 +5,6 @@ define <2 x i64> @low_vector_splat_v2i64_from_i64(i64 %0){
; CHECK-LABEL: low_vector_splat_v2i64_from_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, x0
-; CHECK-NEXT: fmov d0, d0
; CHECK-NEXT: ret
%2 = insertelement <1 x i64> poison, i64 %0, i64 0
%3 = shufflevector <1 x i64> %2, <1 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
index aef01e42ed7cc..a68eda11d5ca1 100644
--- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir
@@ -41,6 +41,11 @@
ret void
}
+ define void @insert_vec_from_gpr64_zero_high(i64 %v, ptr %dst) {
+ entry:
+ ret void
+ }
+
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
...
@@ -521,4 +526,50 @@ body: |
STRSui killed %16, %0, 0 :: (store (s32) into %ir.hist)
RET_ReallyLR
+---
+name: insert_vec_from_gpr64_zero_high
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64common, preferred-register: '' }
+ - { id: 1, class: gpr64common, preferred-register: '' }
+ - { id: 2, class: fpr64, preferred-register: '' }
+ - { id: 3, class: fpr128, preferred-register: '' }
+ - { id: 4, class: fpr128, preferred-register: '' }
+ - { id: 5, class: fpr64, preferred-register: '' }
+ - { id: 6, class: fpr128, preferred-register: '' }
+ - { id: 7, class: fpr128, preferred-register: '' }
+ - { id: 8, class: fpr128, preferred-register: '' }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$x1', virtual-reg: '%1' }
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: insert_vec_from_gpr64_zero_high
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PTR:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[VAL:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[GPR:%[0-9]+]]:gpr64 = COPY [[VAL]]
+ ; CHECK-NEXT: [[FMOV:%[0-9]+]]:fpr64 = FMOVXDr killed [[GPR]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_LOW:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[FMOV]], %subreg.dsub
+ ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_ZERO:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub
+ ; CHECK-NEXT: STRQui killed [[INSERT_LOW]], [[PTR]], 0 :: (store (s128) into %ir.dst, align 8)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+ %2:fpr64 = COPY %1
+ %4:fpr128 = IMPLICIT_DEF
+ %3:fpr128 = INSERT_SUBREG %4, %2, %subreg.dsub
+ %5:fpr64 = MOVID 0
+ %7:fpr128 = IMPLICIT_DEF
+ %6:fpr128 = INSERT_SUBREG %7, killed %5, %subreg.dsub
+ %8:fpr128 = INSvi64lane %3, 1, killed %6, 0
+ STRQui killed %8, %0, 0 :: (store (s128) into %ir.dst, align 8)
+ RET_ReallyLR
+
...
``````````
</details>
https://github.com/llvm/llvm-project/pull/182835
More information about the llvm-branch-commits
mailing list