[llvm] [RISCV] Remove support for pre-RA vsetvli insertion (PR #110796)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 28 04:31:10 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/110796
>From 1321450148841be7ceca80df4ecfc007cb175603 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 2 Oct 2024 15:38:40 +0800
Subject: [PATCH] [RISCV] Remove support for pre-RA vsetvli insertion
Now that LLVM 19.1.1 has been out for a while with post-vector-RA vsetvli insertion enabled by default, this proposes to remove the flag that restores the old pre-RA behaviour so we only have one configuration going forward.
That flag was mainly meant as a fallback in case users ran into issues, but I haven't seen anything reported so far.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 11 +-
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 20 +-
llvm/test/CodeGen/RISCV/rvv/remat.ll | 468 +++++--------------
3 files changed, 133 insertions(+), 366 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 3989a966edfd33..a3963fadf3e417 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -179,17 +179,10 @@ bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
case RISCV::VMV_S_X:
case RISCV::VFMV_S_F:
case RISCV::VID_V:
- if (MI.getOperand(1).isUndef() &&
- /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl
- and vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
- i.e. -riscv-vsetvl-after-rvv-regalloc=true */
- !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
- return true;
- break;
+ return MI.getOperand(1).isUndef();
default:
- break;
+ return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
- return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 089dc6c529193d..72d74d2d79b1d5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -99,11 +99,6 @@ static cl::opt<bool> EnableMISchedLoadStoreClustering(
cl::desc("Enable load and store clustering in the machine scheduler"),
cl::init(true));
-static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
- "riscv-vsetvl-after-rvv-regalloc", cl::Hidden,
- cl::desc("Insert vsetvls after vector register allocation"),
- cl::init(true));
-
static cl::opt<bool>
EnableVLOptimizer("riscv-enable-vl-optimizer",
cl::desc("Enable the RISC-V VL Optimizer pass"),
@@ -413,8 +408,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
- if (EnableVSETVLIAfterRVVRegAlloc)
- addPass(createRISCVInsertVSETVLIPass());
+ addPass(createRISCVInsertVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
@@ -424,8 +418,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
- if (EnableVSETVLIAfterRVVRegAlloc)
- addPass(createRISCVInsertVSETVLIPass());
+ addPass(createRISCVInsertVSETVLIPass());
if (TM->getOptLevel() != CodeGenOptLevel::None &&
EnableRISCVDeadRegisterElimination)
addPass(createRISCVDeadRegisterDefinitionsPass());
@@ -575,15 +568,6 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVInsertReadWriteCSRPass());
addPass(createRISCVInsertWriteVXRMPass());
addPass(createRISCVLandingPadSetupPass());
-
- // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
- // register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
- if (!EnableVSETVLIAfterRVVRegAlloc) {
- if (TM->getOptLevel() == CodeGenOptLevel::None)
- insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
- else
- insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
- }
}
void RISCVPassConfig::addFastRegAlloc() {
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 4f58ccb5188d31..64c59769546fb7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -1,53 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,POSTRA
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-vsetvl-after-rvv-regalloc=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,PRERA
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
define void @vid(ptr %p) {
-; POSTRA-LABEL: vid:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vid.v v8
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vid.v v8
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vid:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a1, vlenb
-; PRERA-NEXT: slli a1, a1, 3
-; PRERA-NEXT: sub sp, sp, a1
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT: vid.v v8
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vid:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: ret
%vid = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(<vscale x 8 x i64> poison, i64 -1)
store volatile <vscale x 8 x i64> %vid, ptr %p
@@ -111,51 +81,22 @@ define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
}
define void @vmv.v.i(ptr %p) {
-; POSTRA-LABEL: vmv.v.i:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vmv.v.i v8, 1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vmv.v.i v8, 1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.v.i:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a1, vlenb
-; PRERA-NEXT: slli a1, a1, 3
-; PRERA-NEXT: sub sp, sp, a1
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT: vmv.v.i v8, 1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.v.i:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: ret
%vmv.v.i = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 1, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
@@ -172,66 +113,35 @@ define void @vmv.v.i(ptr %p) {
ret void
}
-; The live range of %x needs extended down to the use of vmv.v.x at the end of
-; the block.
define void @vmv.v.x_needs_extended(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.v.x_needs_extended:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: addi sp, sp, -16
-; POSTRA-NEXT: .cfi_def_cfa_offset 16
-; POSTRA-NEXT: csrr a2, vlenb
-; POSTRA-NEXT: slli a2, a2, 3
-; POSTRA-NEXT: sub sp, sp, a2
-; POSTRA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; POSTRA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vmv.v.x v8, a1
-; POSTRA-NEXT: addi a1, sp, 16
-; POSTRA-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: csrr a0, vlenb
-; POSTRA-NEXT: slli a0, a0, 3
-; POSTRA-NEXT: add sp, sp, a0
-; POSTRA-NEXT: addi sp, sp, 16
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.v.x_needs_extended:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a2, vlenb
-; PRERA-NEXT: slli a2, a2, 3
-; PRERA-NEXT: sub sp, sp, a2
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; PRERA-NEXT: vmv.v.x v8, a1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.v.x_needs_extended:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%vmv.v.x = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.x, ptr %p
@@ -249,53 +159,23 @@ define void @vmv.v.x_needs_extended(ptr %p, i64 %x) {
}
define void @vmv.v.x_live(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.v.x_live:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vmv.v.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vmv.v.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: sd a1, 0(a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.v.x_live:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a2, vlenb
-; PRERA-NEXT: slli a2, a2, 3
-; PRERA-NEXT: sub sp, sp, a2
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; PRERA-NEXT: vmv.v.x v8, a1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a2, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: sd a1, 0(a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.v.x_live:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
%vmv.v.x = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.x, ptr %p
@@ -314,53 +194,23 @@ define void @vmv.v.x_live(ptr %p, i64 %x) {
}
define void @vfmv.v.f(ptr %p, double %x) {
-; POSTRA-LABEL: vfmv.v.f:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT: vfmv.v.f v8, fa0
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vfmv.v.f v8, fa0
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: fsd fa0, 0(a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vfmv.v.f:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a1, vlenb
-; PRERA-NEXT: slli a1, a1, 3
-; PRERA-NEXT: sub sp, sp, a1
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT: vfmv.v.f v8, fa0
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: fsd fa0, 0(a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vfmv.v.f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: fsd fa0, 0(a0)
+; CHECK-NEXT: ret
%vfmv.v.f = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
store volatile <vscale x 8 x double> %vfmv.v.f, ptr %p
@@ -379,53 +229,23 @@ define void @vfmv.v.f(ptr %p, double %x) {
}
define void @vmv.s.x(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.s.x:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; POSTRA-NEXT: vmv.s.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vmv.s.x v8, a1
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: sd a1, 0(a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vmv.s.x:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a2, vlenb
-; PRERA-NEXT: slli a2, a2, 3
-; PRERA-NEXT: sub sp, sp, a2
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; PRERA-NEXT: vmv.s.x v8, a1
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a2, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: sd a1, 0(a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vmv.s.x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
%vmv.s.x = call <vscale x 8 x i64> @llvm.riscv.vmv.s.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
store volatile <vscale x 8 x i64> %vmv.s.x, ptr %p
@@ -444,53 +264,23 @@ define void @vmv.s.x(ptr %p, i64 %x) {
}
define void @vfmv.s.f(ptr %p, double %x) {
-; POSTRA-LABEL: vfmv.s.f:
-; POSTRA: # %bb.0:
-; POSTRA-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; POSTRA-NEXT: vfmv.s.f v8, fa0
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vl8re64.v v16, (a0)
-; POSTRA-NEXT: vl8re64.v v24, (a0)
-; POSTRA-NEXT: vl8re64.v v0, (a0)
-; POSTRA-NEXT: vl8re64.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: vs8r.v v0, (a0)
-; POSTRA-NEXT: vs8r.v v24, (a0)
-; POSTRA-NEXT: vs8r.v v16, (a0)
-; POSTRA-NEXT: vfmv.s.f v8, fa0
-; POSTRA-NEXT: vs8r.v v8, (a0)
-; POSTRA-NEXT: fsd fa0, 0(a0)
-; POSTRA-NEXT: ret
-;
-; PRERA-LABEL: vfmv.s.f:
-; PRERA: # %bb.0:
-; PRERA-NEXT: addi sp, sp, -16
-; PRERA-NEXT: .cfi_def_cfa_offset 16
-; PRERA-NEXT: csrr a1, vlenb
-; PRERA-NEXT: slli a1, a1, 3
-; PRERA-NEXT: sub sp, sp, a1
-; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; PRERA-NEXT: vfmv.s.f v8, fa0
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: addi a1, sp, 16
-; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT: vl8re64.v v24, (a0)
-; PRERA-NEXT: vl8re64.v v0, (a0)
-; PRERA-NEXT: vl8re64.v v16, (a0)
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v0, (a0)
-; PRERA-NEXT: vs8r.v v24, (a0)
-; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT: vs8r.v v16, (a0)
-; PRERA-NEXT: vs8r.v v8, (a0)
-; PRERA-NEXT: fsd fa0, 0(a0)
-; PRERA-NEXT: csrr a0, vlenb
-; PRERA-NEXT: slli a0, a0, 3
-; PRERA-NEXT: add sp, sp, a0
-; PRERA-NEXT: addi sp, sp, 16
-; PRERA-NEXT: ret
+; CHECK-LABEL: vfmv.s.f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: fsd fa0, 0(a0)
+; CHECK-NEXT: ret
%vfmv.s.f = call <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
store volatile <vscale x 8 x double> %vfmv.s.f, ptr %p
More information about the llvm-commits
mailing list