[llvm] [MachineLICM] Hoist copies of constant physical register (PR #93285)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri May 24 02:51:09 PDT 2024
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/93285
Previously, we just check if the source is a virtual register and
this prevents some potencial hoists.
We can see some improvements in AArch64/RISCV tests.
>From 939d6316075fdf2952701c9730620cdf56c783fb Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 24 May 2024 17:07:46 +0800
Subject: [PATCH 1/2] [RISCV] Add tests for MachineLICM
---
.../RISCV/machinelicm-constant-phys-reg.ll | 41 +++++++++++++++++++
1 file changed, 41 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
diff --git a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
new file mode 100644
index 0000000000000..2b243307309cb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
+
+define i32 @test(ptr %a, i64 %n) {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-NEXT: .LBB0_1: # %loop
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vl1re32.v v8, (a0)
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: vmv.s.x v9, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v9
+; CHECK-NEXT: vmv.x.s a3, v8
+; CHECK-NEXT: addw a3, a3, a3
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: addi a0, a0, 8
+; CHECK-NEXT: bnez a1, .LBB0_1
+; CHECK-NEXT: # %bb.2: # %exit
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: ret
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar.inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop ]
+ %idx = getelementptr inbounds ptr, ptr %a, i64 %indvar
+ %data = load <vscale x 2 x i32>, ptr %idx
+ %reduce = tail call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %data)
+ %sum.inc = add i32 %reduce, %reduce
+ %indvar.inc = add i64 %indvar, 1
+ %cmp = icmp eq i64 %indvar.inc, %n
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ ret i32 %sum
+}
>From dd72a111470accca0a3437c5f5752aac340a8ced Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 24 May 2024 17:39:04 +0800
Subject: [PATCH 2/2] [MachineLICM] Hoist copies of constant physical register
Previously, we just check if the source is a virtual register and
this prevents some potencial hoists.
We can see some improvements in AArch64/RISCV tests.
---
llvm/lib/CodeGen/MachineLICM.cpp | 5 +-
.../AArch64/atomicrmw-uinc-udec-wrap.ll | 10 +-
.../AArch64/dag-combine-concat-vectors.ll | 66 ++++----
.../machine-sink-cache-invalidation.ll | 10 +-
.../AArch64/ragreedy-local-interval-cost.ll | 148 +++++++++---------
llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll | 16 +-
llvm/test/CodeGen/AVR/shift.ll | 6 +-
.../RISCV/machinelicm-constant-phys-reg.ll | 8 +-
.../RISCV/rvv/65704-illegal-instruction.ll | 21 +--
.../RISCV/rvv/fold-scalar-load-crash.ll | 36 ++---
llvm/test/CodeGen/RISCV/vlenb.ll | 5 +-
11 files changed, 169 insertions(+), 162 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 727a98c41bce4..9a390f616fae1 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -1269,8 +1269,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,
Register DefReg = MI.getOperand(0).getReg();
if (DefReg.isVirtual() &&
all_of(MI.uses(),
- [](const MachineOperand &UseOp) {
- return !UseOp.isReg() || UseOp.getReg().isVirtual();
+ [&](const MachineOperand &UseOp) {
+ return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
+ MRI->isConstantPhysReg(UseOp.getReg());
}) &&
IsLoopInvariantInst(MI, CurLoop) &&
any_of(MRI->use_nodbg_instructions(DefReg),
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
index 5f293e5c7ea34..66fea3535b1ec 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
@@ -55,15 +55,15 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: .LBB3_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldaxr x0, [x8]
-; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: csinc x9, xzr, x0, hs
-; CHECK-NEXT: stlxr w10, x9, [x8]
+; CHECK-NEXT: ldaxr x8, [x0]
+; CHECK-NEXT: cmp x8, x1
+; CHECK-NEXT: csinc x9, xzr, x8, hs
+; CHECK-NEXT: stlxr w10, x9, [x0]
; CHECK-NEXT: cbnz w10, .LBB3_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
+; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: ret
%result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst
ret i64 %result
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
index 83c7f73800af1..dfe0e83649e20 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
@@ -8,57 +8,57 @@ declare void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8>, <vscale x
define fastcc i8 @allocno_reload_assign() {
; CHECK-LABEL: allocno_reload_assign:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.b, #0 // =0x0
-; CHECK-NEXT: mov z16.d, #0 // =0x0
+; CHECK-NEXT: fmov d0, xzr
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: mov z16.d, #0 // =0x0
+; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s
+; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
+; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
+; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: uunpklo z1.h, z0.b
; CHECK-NEXT: uunpkhi z0.h, z0.b
+; CHECK-NEXT: whilelo p1.b, xzr, x8
+; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpkhi z3.s, z1.h
; CHECK-NEXT: uunpklo z5.s, z0.h
; CHECK-NEXT: uunpkhi z7.s, z0.h
+; CHECK-NEXT: punpklo p1.h, p0.b
+; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: punpklo p2.h, p1.b
; CHECK-NEXT: uunpklo z0.d, z2.s
; CHECK-NEXT: uunpkhi z1.d, z2.s
+; CHECK-NEXT: punpkhi p3.h, p1.b
; CHECK-NEXT: uunpklo z2.d, z3.s
; CHECK-NEXT: uunpkhi z3.d, z3.s
+; CHECK-NEXT: punpklo p5.h, p0.b
; CHECK-NEXT: uunpklo z4.d, z5.s
; CHECK-NEXT: uunpkhi z5.d, z5.s
+; CHECK-NEXT: punpkhi p7.h, p0.b
; CHECK-NEXT: uunpklo z6.d, z7.s
; CHECK-NEXT: uunpkhi z7.d, z7.s
-; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: fmov d17, xzr
-; CHECK-NEXT: cmpeq p2.d, p0/z, z17.d, #0
-; CHECK-NEXT: uzp1 p2.s, p2.s, p0.s
-; CHECK-NEXT: uzp1 p2.h, p2.h, p0.h
-; CHECK-NEXT: uzp1 p2.b, p2.b, p0.b
-; CHECK-NEXT: mov z17.b, p2/z, #1 // =0x1
-; CHECK-NEXT: fmov w8, s17
-; CHECK-NEXT: sbfx x8, x8, #0, #1
-; CHECK-NEXT: whilelo p2.b, xzr, x8
-; CHECK-NEXT: not p2.b, p1/z, p2.b
-; CHECK-NEXT: punpklo p3.h, p2.b
-; CHECK-NEXT: punpkhi p2.h, p2.b
-; CHECK-NEXT: punpklo p4.h, p3.b
-; CHECK-NEXT: punpkhi p3.h, p3.b
-; CHECK-NEXT: punpklo p5.h, p4.b
-; CHECK-NEXT: punpkhi p4.h, p4.b
-; CHECK-NEXT: st1b { z0.d }, p5, [z16.d]
-; CHECK-NEXT: st1b { z1.d }, p4, [z16.d]
-; CHECK-NEXT: punpklo p4.h, p3.b
+; CHECK-NEXT: punpklo p0.h, p2.b
+; CHECK-NEXT: punpkhi p1.h, p2.b
+; CHECK-NEXT: punpklo p2.h, p3.b
; CHECK-NEXT: punpkhi p3.h, p3.b
-; CHECK-NEXT: st1b { z2.d }, p4, [z16.d]
+; CHECK-NEXT: punpklo p4.h, p5.b
+; CHECK-NEXT: punpkhi p5.h, p5.b
+; CHECK-NEXT: punpklo p6.h, p7.b
+; CHECK-NEXT: punpkhi p7.h, p7.b
+; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: st1b { z0.d }, p0, [z16.d]
+; CHECK-NEXT: st1b { z1.d }, p1, [z16.d]
+; CHECK-NEXT: st1b { z2.d }, p2, [z16.d]
; CHECK-NEXT: st1b { z3.d }, p3, [z16.d]
-; CHECK-NEXT: punpklo p3.h, p2.b
-; CHECK-NEXT: punpkhi p2.h, p2.b
-; CHECK-NEXT: punpklo p4.h, p3.b
-; CHECK-NEXT: punpkhi p3.h, p3.b
; CHECK-NEXT: st1b { z4.d }, p4, [z16.d]
-; CHECK-NEXT: st1b { z5.d }, p3, [z16.d]
-; CHECK-NEXT: punpklo p3.h, p2.b
-; CHECK-NEXT: punpkhi p2.h, p2.b
-; CHECK-NEXT: st1b { z6.d }, p3, [z16.d]
-; CHECK-NEXT: st1b { z7.d }, p2, [z16.d]
+; CHECK-NEXT: st1b { z5.d }, p5, [z16.d]
+; CHECK-NEXT: st1b { z6.d }, p6, [z16.d]
+; CHECK-NEXT: st1b { z7.d }, p7, [z16.d]
; CHECK-NEXT: b .LBB0_1
br label %1
diff --git a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll
index 6effc63ecc13c..fe3715341a25b 100644
--- a/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll
+++ b/llvm/test/CodeGen/AArch64/machine-sink-cache-invalidation.ll
@@ -9,20 +9,20 @@ target triple = "arm64-apple-macosx13.5.0"
define i32 @nsis_BZ2_bzDecompress(ptr %pos.i, i1 %cmp661.not3117.i, i1 %exitcond.not.i) {
; CHECK-LABEL: nsis_BZ2_bzDecompress:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1: // %while.end671.i
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: strb w8, [x0]
+; CHECK-NEXT: strb w9, [x0]
; CHECK-NEXT: tbnz w2, #0, .LBB0_4
; CHECK-NEXT: .LBB0_2: // %for.body653.i
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrb w8, [x0]
+; CHECK-NEXT: ldrb w9, [x0]
; CHECK-NEXT: tbnz w1, #0, .LBB0_1
; CHECK-NEXT: // %bb.3: // %while.body663.i
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: ldrb w9, [x9]
-; CHECK-NEXT: strb wzr, [x0, x9]
+; CHECK-NEXT: ldrb w10, [x8]
+; CHECK-NEXT: strb wzr, [x0, x10]
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_4: // %for.end677.i
; CHECK-NEXT: mov w0, wzr
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
index 866b27b81d885..c91de8f3a0a47 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@@ -8,36 +8,39 @@
define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-LABEL: run_test:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: sub sp, sp, #192
-; CHECK-NEXT: .cfi_def_cfa_offset 192
+; CHECK-NEXT: sub sp, sp, #208
+; CHECK-NEXT: .cfi_def_cfa_offset 208
; CHECK-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: str x23, [sp, #160] // 8-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset b8, -40
-; CHECK-NEXT: .cfi_offset b9, -48
-; CHECK-NEXT: .cfi_offset b10, -56
-; CHECK-NEXT: .cfi_offset b11, -64
-; CHECK-NEXT: .cfi_offset b12, -72
-; CHECK-NEXT: .cfi_offset b13, -80
-; CHECK-NEXT: .cfi_offset b14, -88
-; CHECK-NEXT: .cfi_offset b15, -96
+; CHECK-NEXT: .cfi_offset w23, -48
+; CHECK-NEXT: .cfi_offset b8, -56
+; CHECK-NEXT: .cfi_offset b9, -64
+; CHECK-NEXT: .cfi_offset b10, -72
+; CHECK-NEXT: .cfi_offset b11, -80
+; CHECK-NEXT: .cfi_offset b12, -88
+; CHECK-NEXT: .cfi_offset b13, -96
+; CHECK-NEXT: .cfi_offset b14, -104
+; CHECK-NEXT: .cfi_offset b15, -112
; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: // implicit-def: $q1
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov x9, xzr
-; CHECK-NEXT: adrp x10, B+48
-; CHECK-NEXT: add x10, x10, :lo12:B+48
-; CHECK-NEXT: adrp x11, A
-; CHECK-NEXT: add x11, x11, :lo12:A
+; CHECK-NEXT: adrp x9, B+48
+; CHECK-NEXT: add x9, x9, :lo12:B+48
+; CHECK-NEXT: adrp x10, A
+; CHECK-NEXT: add x10, x10, :lo12:A
+; CHECK-NEXT: mov x11, xzr
; CHECK-NEXT: // kill: killed $q1
; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: mov x12, xzr
; CHECK-NEXT: // implicit-def: $q0
; CHECK-NEXT: // implicit-def: $q3
; CHECK-NEXT: // implicit-def: $q4
@@ -69,103 +72,102 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: // kill: killed $q1
; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: ldr q14, [x8]
-; CHECK-NEXT: mov x12, xzr
-; CHECK-NEXT: ldr x14, [x12]
; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill
-; CHECK-NEXT: add x19, x11, x8
-; CHECK-NEXT: fmov x15, d14
-; CHECK-NEXT: mov x16, v14.d[1]
-; CHECK-NEXT: ldr q15, [x12]
-; CHECK-NEXT: ldr q14, [x10], #64
+; CHECK-NEXT: ldr q15, [x8]
+; CHECK-NEXT: ldr x15, [x8]
+; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: add x20, x10, x11
; CHECK-NEXT: mov v8.16b, v28.16b
-; CHECK-NEXT: fmov x13, d15
-; CHECK-NEXT: mov x18, v15.d[1]
+; CHECK-NEXT: fmov x2, d15
+; CHECK-NEXT: mov x17, v15.d[1]
+; CHECK-NEXT: ldr q14, [x8]
; CHECK-NEXT: mov v28.16b, v24.16b
-; CHECK-NEXT: mul x17, x15, x14
-; CHECK-NEXT: mov x12, v14.d[1]
-; CHECK-NEXT: fmov x4, d14
; CHECK-NEXT: mov v24.16b, v20.16b
; CHECK-NEXT: mov v20.16b, v17.16b
+; CHECK-NEXT: fmov x13, d14
+; CHECK-NEXT: mov x16, v14.d[1]
; CHECK-NEXT: mov v17.16b, v5.16b
-; CHECK-NEXT: mul x1, x16, x14
+; CHECK-NEXT: mul x3, x2, x15
+; CHECK-NEXT: ldr q14, [x9], #64
; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x5, [x8]
-; CHECK-NEXT: ldr x19, [x19, #128]
+; CHECK-NEXT: ldr x6, [x8]
+; CHECK-NEXT: ldr x20, [x20, #128]
+; CHECK-NEXT: mul x1, x17, x15
+; CHECK-NEXT: mov x14, v14.d[1]
+; CHECK-NEXT: fmov x5, d14
; CHECK-NEXT: mov v29.16b, v21.16b
; CHECK-NEXT: mov v21.16b, v0.16b
-; CHECK-NEXT: mul x0, x13, x14
; CHECK-NEXT: mov v25.16b, v6.16b
+; CHECK-NEXT: mul x18, x13, x15
; CHECK-NEXT: mov v6.16b, v2.16b
-; CHECK-NEXT: fmov d15, x17
; CHECK-NEXT: mov v26.16b, v22.16b
+; CHECK-NEXT: fmov d15, x3
; CHECK-NEXT: mov v22.16b, v18.16b
-; CHECK-NEXT: mul x2, x18, x14
; CHECK-NEXT: mov v18.16b, v7.16b
+; CHECK-NEXT: mul x0, x16, x15
; CHECK-NEXT: mov v7.16b, v3.16b
; CHECK-NEXT: mov v16.16b, v4.16b
-; CHECK-NEXT: add x8, x8, #8
-; CHECK-NEXT: add x9, x9, #1
+; CHECK-NEXT: add x11, x11, #8
+; CHECK-NEXT: add x12, x12, #1
; CHECK-NEXT: mov v15.d[1], x1
-; CHECK-NEXT: mul x3, x12, x14
-; CHECK-NEXT: cmp x8, #64
-; CHECK-NEXT: fmov d14, x0
-; CHECK-NEXT: mul x14, x4, x14
+; CHECK-NEXT: mul x4, x14, x15
+; CHECK-NEXT: cmp x11, #64
+; CHECK-NEXT: fmov d14, x18
+; CHECK-NEXT: mul x15, x5, x15
; CHECK-NEXT: add v5.2d, v5.2d, v15.2d
-; CHECK-NEXT: mul x20, x15, x5
-; CHECK-NEXT: mov v14.d[1], x2
-; CHECK-NEXT: mul x15, x15, x19
-; CHECK-NEXT: fmov d0, x14
+; CHECK-NEXT: mul x21, x2, x6
+; CHECK-NEXT: mov v14.d[1], x0
+; CHECK-NEXT: mul x2, x2, x20
+; CHECK-NEXT: fmov d0, x15
; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: mul x21, x13, x19
+; CHECK-NEXT: mul x22, x13, x20
; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
-; CHECK-NEXT: fmov d3, x20
-; CHECK-NEXT: mul x7, x16, x5
-; CHECK-NEXT: mov v0.d[1], x3
-; CHECK-NEXT: fmov d1, x15
-; CHECK-NEXT: mul x16, x16, x19
+; CHECK-NEXT: fmov d3, x21
+; CHECK-NEXT: mul x19, x17, x6
+; CHECK-NEXT: mov v0.d[1], x4
+; CHECK-NEXT: fmov d1, x2
+; CHECK-NEXT: mul x17, x17, x20
; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: add v5.2d, v13.2d, v14.2d
-; CHECK-NEXT: fmov d2, x21
+; CHECK-NEXT: fmov d2, x22
; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: mul x6, x18, x5
+; CHECK-NEXT: mul x7, x16, x6
; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT: mov v3.d[1], x7
+; CHECK-NEXT: mov v3.d[1], x19
; CHECK-NEXT: add v13.2d, v13.2d, v0.2d
-; CHECK-NEXT: mul x18, x18, x19
-; CHECK-NEXT: mov v1.d[1], x16
-; CHECK-NEXT: mul x22, x4, x19
+; CHECK-NEXT: mul x16, x16, x20
+; CHECK-NEXT: mov v1.d[1], x17
+; CHECK-NEXT: mul x23, x5, x20
; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mov v13.16b, v5.16b
; CHECK-NEXT: mov v5.16b, v17.16b
; CHECK-NEXT: mov v17.16b, v20.16b
; CHECK-NEXT: mov v20.16b, v24.16b
-; CHECK-NEXT: mul x13, x13, x5
+; CHECK-NEXT: mul x13, x13, x6
; CHECK-NEXT: mov v24.16b, v28.16b
; CHECK-NEXT: add v11.2d, v11.2d, v3.2d
-; CHECK-NEXT: mov v2.d[1], x18
+; CHECK-NEXT: mov v2.d[1], x16
; CHECK-NEXT: add v15.2d, v15.2d, v1.2d
; CHECK-NEXT: add v27.2d, v27.2d, v3.2d
-; CHECK-NEXT: mul x17, x12, x19
+; CHECK-NEXT: mul x18, x14, x20
; CHECK-NEXT: add v23.2d, v23.2d, v3.2d
; CHECK-NEXT: add v19.2d, v19.2d, v3.2d
-; CHECK-NEXT: fmov d4, x22
+; CHECK-NEXT: fmov d4, x23
; CHECK-NEXT: add v10.2d, v10.2d, v3.2d
-; CHECK-NEXT: mul x14, x4, x5
+; CHECK-NEXT: mul x15, x5, x6
; CHECK-NEXT: fmov d0, x13
; CHECK-NEXT: add v14.2d, v14.2d, v2.2d
; CHECK-NEXT: add v2.2d, v6.2d, v3.2d
-; CHECK-NEXT: mul x12, x12, x5
+; CHECK-NEXT: mul x14, x14, x6
; CHECK-NEXT: mov v3.16b, v7.16b
; CHECK-NEXT: mov v7.16b, v18.16b
-; CHECK-NEXT: mov v4.d[1], x17
+; CHECK-NEXT: mov v4.d[1], x18
; CHECK-NEXT: mov v18.16b, v22.16b
-; CHECK-NEXT: mov v0.d[1], x6
-; CHECK-NEXT: fmov d1, x14
+; CHECK-NEXT: mov v0.d[1], x7
+; CHECK-NEXT: fmov d1, x15
; CHECK-NEXT: add v28.2d, v8.2d, v4.2d
-; CHECK-NEXT: mov v1.d[1], x12
+; CHECK-NEXT: mov v1.d[1], x14
; CHECK-NEXT: add v31.2d, v31.2d, v0.2d
; CHECK-NEXT: add v30.2d, v30.2d, v0.2d
; CHECK-NEXT: add v12.2d, v12.2d, v0.2d
@@ -192,11 +194,12 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: adrp x8, C
; CHECK-NEXT: add x8, x8, :lo12:C
; CHECK-NEXT: stp q11, q30, [x8, #80]
-; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
; CHECK-NEXT: str q1, [x8]
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x23, [sp, #160] // 8-byte Folded Reload
; CHECK-NEXT: stp q15, q14, [x8, #144]
-; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
; CHECK-NEXT: stp q1, q13, [x8, #16]
; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: stp q28, q12, [x8, #176]
@@ -216,12 +219,13 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: stp q5, q4, [x8, #432]
; CHECK-NEXT: stp q2, q3, [x8, #464]
; CHECK-NEXT: str q0, [x8, #496]
-; CHECK-NEXT: add sp, sp, #192
+; CHECK-NEXT: add sp, sp, #208
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w20
; CHECK-NEXT: .cfi_restore w21
; CHECK-NEXT: .cfi_restore w22
+; CHECK-NEXT: .cfi_restore w23
; CHECK-NEXT: .cfi_restore b8
; CHECK-NEXT: .cfi_restore b9
; CHECK-NEXT: .cfi_restore b10
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index bfc249e9081d2..340f0cdd5d5d0 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -245,6 +245,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
; SI-NEXT: {{ $}}
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI2]], %subreg.sub0, killed [[PHI3]], %subreg.sub1
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+ ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
; SI-NEXT: {{ $}}
; SI-NEXT: bb.3:
; SI-NEXT: successors: %bb.4(0x80000000)
@@ -261,8 +262,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; SI-NEXT: {{ $}}
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]]
+ ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]]
; SI-NEXT: $vgpr0 = COPY killed [[PHI5]]
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
@@ -282,6 +282,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
; SI-NEXT: {{ $}}
; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1
; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+ ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
; SI-NEXT: {{ $}}
; SI-NEXT: bb.7:
; SI-NEXT: successors: %bb.8(0x80000000)
@@ -298,8 +299,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun
; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000)
; SI-NEXT: {{ $}}
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]]
+ ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]]
; SI-NEXT: $vgpr0 = COPY killed [[PHI7]]
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
@@ -367,6 +367,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: {{ $}}
; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[PHI1]], %subreg.sub0, killed [[PHI2]], %subreg.sub1
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+ ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
; SI-NEXT: {{ $}}
; SI-NEXT: bb.3:
; SI-NEXT: successors: %bb.4(0x80000000)
@@ -382,8 +383,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000)
; SI-NEXT: {{ $}}
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]]
+ ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]]
; SI-NEXT: $vgpr0 = COPY [[COPY4]]
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
@@ -403,6 +403,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: {{ $}}
; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[COPY1]], %subreg.sub0, killed [[COPY]], %subreg.sub1
; SI-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo
+ ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
; SI-NEXT: {{ $}}
; SI-NEXT: bb.7:
; SI-NEXT: successors: %bb.8(0x80000000)
@@ -418,8 +419,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e
; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000)
; SI-NEXT: {{ $}}
; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
- ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]]
+ ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]]
; SI-NEXT: $vgpr0 = COPY [[COPY4]]
; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0
; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll
index c0abc77c9b14a..55ea509a8a5b6 100644
--- a/llvm/test/CodeGen/AVR/shift.ll
+++ b/llvm/test/CodeGen/AVR/shift.ll
@@ -60,13 +60,13 @@ define i64 @shift_i64_i64(i64 %a, i64 %b) {
; CHECK-NEXT: breq .LBB3_3
; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader
; CHECK-NEXT: mov r27, r1
-; CHECK-NEXT: mov r16, r1
-; CHECK-NEXT: mov r17, r1
+; CHECK-NEXT: mov r16, r27
+; CHECK-NEXT: mov r17, r27
; CHECK-NEXT: .LBB3_2: ; %shift.loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: mov r31, r21
; CHECK-NEXT: lsl r31
-; CHECK-NEXT: mov r26, r1
+; CHECK-NEXT: mov r26, r27
; CHECK-NEXT: rol r26
; CHECK-NEXT: lsl r22
; CHECK-NEXT: rol r23
diff --git a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
index 2b243307309cb..e30bdfb939471 100644
--- a/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
+++ b/llvm/test/CodeGen/RISCV/machinelicm-constant-phys-reg.ll
@@ -8,13 +8,13 @@ define i32 @test(ptr %a, i64 %n) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: .LBB0_1: # %loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vl1re32.v v8, (a0)
+; CHECK-NEXT: vl1re32.v v9, (a0)
; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vredsum.vs v8, v8, v9
-; CHECK-NEXT: vmv.x.s a3, v8
+; CHECK-NEXT: vredsum.vs v9, v9, v8
+; CHECK-NEXT: vmv.x.s a3, v9
; CHECK-NEXT: addw a3, a3, a3
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: addi a0, a0, 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll b/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll
index 42d6dac5b07fa..5ced89c17c420 100644
--- a/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/65704-illegal-instruction.ll
@@ -15,27 +15,30 @@ define void @foo(<vscale x 8 x i8> %0) {
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: li s0, 0
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vsetivli zero, 0, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vi v9, v10, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv.x.s s0, v9
+; CHECK-NEXT: vmv.x.s s1, v9
; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv.x.s s1, v8
+; CHECK-NEXT: vmv.x.s s2, v8
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: mv a0, s0
-; CHECK-NEXT: mv a2, s1
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: jalr a1
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: mv a1, s0
+; CHECK-NEXT: mv a2, s2
+; CHECK-NEXT: mv a3, s0
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: mv a5, s0
+; CHECK-NEXT: jalr s0
; CHECK-NEXT: j .LBB0_1
%2 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> undef, i64 0)
%3 = tail call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v16i8(<vscale x 8 x i8> undef, <16 x i8> poison, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
index c8bed2de754b2..477f7dfc0e763 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
@@ -12,21 +12,19 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: addi a1, a2, 1
-; RV32-NEXT: .LBB0_1: # %for.body
-; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: vmv.s.x v9, zero
-; RV32-NEXT: vmv1r.v v10, v8
; RV32-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
-; RV32-NEXT: vslideup.vx v10, v9, a2
+; RV32-NEXT: vslideup.vx v8, v9, a2
; RV32-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
-; RV32-NEXT: vmv.s.x v10, a0
+; RV32-NEXT: vmv.s.x v8, a0
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmseq.vi v9, v10, 0
-; RV32-NEXT: vmv.x.s a3, v9
-; RV32-NEXT: andi a3, a3, 255
-; RV32-NEXT: bnez a3, .LBB0_1
+; RV32-NEXT: vmseq.vi v8, v8, 0
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: andi a0, a0, 255
+; RV32-NEXT: .LBB0_1: # %for.body
+; RV32-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: bnez a0, .LBB0_1
; RV32-NEXT: # %bb.2: # %if.then381
-; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
;
; RV64-LABEL: test:
@@ -37,21 +35,19 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.x v8, a3
; RV64-NEXT: addi a1, a2, 1
-; RV64-NEXT: .LBB0_1: # %for.body
-; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vmv1r.v v10, v8
; RV64-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
-; RV64-NEXT: vslideup.vx v10, v9, a2
+; RV64-NEXT: vslideup.vx v8, v9, a2
; RV64-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
-; RV64-NEXT: vmv.s.x v10, a0
+; RV64-NEXT: vmv.s.x v8, a0
; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmseq.vi v9, v10, 0
-; RV64-NEXT: vmv.x.s a3, v9
-; RV64-NEXT: andi a3, a3, 255
-; RV64-NEXT: bnez a3, .LBB0_1
+; RV64-NEXT: vmseq.vi v8, v8, 0
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: andi a0, a0, 255
+; RV64-NEXT: .LBB0_1: # %for.body
+; RV64-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64-NEXT: bnez a0, .LBB0_1
; RV64-NEXT: # %bb.2: # %if.then381
-; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
entry:
br label %for.body
diff --git a/llvm/test/CodeGen/RISCV/vlenb.ll b/llvm/test/CodeGen/RISCV/vlenb.ll
index 1d6c1b5d1acbd..26d4f99c3b979 100644
--- a/llvm/test/CodeGen/RISCV/vlenb.ll
+++ b/llvm/test/CodeGen/RISCV/vlenb.ll
@@ -71,10 +71,13 @@ define void @machine_licm() {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -4
+; CHECK-NEXT: .cfi_offset s0, -8
+; CHECK-NEXT: csrr s0, vlenb
; CHECK-NEXT: .LBB4_1: # %loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: mv a0, s0
; CHECK-NEXT: call use
; CHECK-NEXT: j .LBB4_1
entry:
More information about the llvm-commits
mailing list