[llvm-branch-commits] [llvm] [RISCV] Use larger copies when register tuples are aligned (PR #84455)
Wang Pengcheng via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Mar 14 01:45:36 PDT 2024
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/84455
>From 35d0ea085b43a67c092e6263e6ec9d34e66e1453 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 12 Mar 2024 17:31:47 +0800
Subject: [PATCH 1/2] Reduce copies
Created using spr 1.3.4
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 89 +++++-----
llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir | 30 +---
llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir | 175 +++++++------------
3 files changed, 106 insertions(+), 188 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 7895e87702c711..9fe5666d6a81f4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -302,58 +302,38 @@ void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB,
RISCVII::VLMUL LMul, unsigned NF) const {
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- int I = 0, End = NF, Incr = 1;
unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned DstEncoding = TRI->getEncodingValue(DstReg);
unsigned LMulVal;
bool Fractional;
std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
assert(!Fractional && "It is impossible be fractional lmul here.");
- if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
- I = NF - 1;
- End = -1;
- Incr = -1;
- }
+ unsigned NumRegs = NF * LMulVal;
+ bool ReversedCopy =
+ forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);
- for (; I != End; I += Incr) {
+ unsigned I = 0;
+ while (I != NumRegs) {
auto GetCopyInfo =
- [](RISCVII::VLMUL LMul,unsigned NF) -> std::tuple<unsigned, unsigned, unsigned, unsigned> {
- unsigned Opc;
- unsigned SubRegIdx;
- unsigned VVOpc, VIOpc;
- switch (LMul) {
- default:
- llvm_unreachable("Impossible LMUL for vector register copy.");
- case RISCVII::LMUL_1:
- Opc = RISCV::VMV1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- VVOpc = RISCV::PseudoVMV_V_V_M1;
- VIOpc = RISCV::PseudoVMV_V_I_M1;
- break;
- case RISCVII::LMUL_2:
- Opc = RISCV::VMV2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- VVOpc = RISCV::PseudoVMV_V_V_M2;
- VIOpc = RISCV::PseudoVMV_V_I_M2;
- break;
- case RISCVII::LMUL_4:
- Opc = RISCV::VMV4R_V;
- SubRegIdx = RISCV::sub_vrm4_0;
- VVOpc = RISCV::PseudoVMV_V_V_M4;
- VIOpc = RISCV::PseudoVMV_V_I_M4;
- break;
- case RISCVII::LMUL_8:
- assert(NF == 1);
- Opc = RISCV::VMV8R_V;
- SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0.
- VVOpc = RISCV::PseudoVMV_V_V_M8;
- VIOpc = RISCV::PseudoVMV_V_I_M8;
- break;
- }
- return {SubRegIdx, Opc, VVOpc, VIOpc};
+ [&](unsigned SrcReg,
+ unsigned DstReg) -> std::tuple<int, const TargetRegisterClass &,
+ unsigned, unsigned, unsigned> {
+ unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
+ unsigned DstEncoding = TRI->getEncodingValue(DstReg);
+ if (!(SrcEncoding & 0b111) && !(DstEncoding & 0b111) && I + 8 <= NumRegs)
+ return {8, RISCV::VRM8RegClass, RISCV::VMV8R_V, RISCV::PseudoVMV_V_V_M8,
+ RISCV::PseudoVMV_V_I_M8};
+ if (!(SrcEncoding & 0b11) && !(DstEncoding & 0b11) && I + 4 <= NumRegs)
+ return {4, RISCV::VRM4RegClass, RISCV::VMV4R_V, RISCV::PseudoVMV_V_V_M4,
+ RISCV::PseudoVMV_V_I_M4};
+ if (!(SrcEncoding & 0b1) && !(DstEncoding & 0b1) && I + 2 <= NumRegs)
+ return {2, RISCV::VRM2RegClass, RISCV::VMV2R_V, RISCV::PseudoVMV_V_V_M2,
+ RISCV::PseudoVMV_V_I_M2};
+ return {1, RISCV::VRRegClass, RISCV::VMV1R_V, RISCV::PseudoVMV_V_V_M1,
+ RISCV::PseudoVMV_V_I_M1};
};
- auto [SubRegIdx, Opc, VVOpc, VIOpc] = GetCopyInfo(LMul, NF);
+ auto [NumCopied, RegClass, Opc, VVOpc, VIOpc] = GetCopyInfo(SrcReg, DstReg);
MachineBasicBlock::const_iterator DefMBBI;
if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
@@ -364,6 +344,20 @@ void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB,
}
}
+ for (MCPhysReg Reg : RegClass.getRegisters()) {
+ if (TRI->getEncodingValue(Reg) == TRI->getEncodingValue(SrcReg)) {
+ SrcReg = Reg;
+ break;
+ }
+ }
+
+ for (MCPhysReg Reg : RegClass.getRegisters()) {
+ if (TRI->getEncodingValue(Reg) == TRI->getEncodingValue(DstReg)) {
+ DstReg = Reg;
+ break;
+ }
+ }
+
auto EmitCopy = [&](MCRegister SrcReg, MCRegister DstReg, unsigned Opcode) {
auto MIB = BuildMI(MBB, MBBI, DL, get(Opcode), DstReg);
bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opcode) == RISCV::VMV_V_I;
@@ -385,13 +379,10 @@ void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB,
}
};
- if (NF == 1) {
- EmitCopy(SrcReg, DstReg, Opc);
- return;
- }
-
- EmitCopy(TRI->getSubReg(SrcReg, SubRegIdx + I),
- TRI->getSubReg(DstReg, SubRegIdx + I), Opc);
+ EmitCopy(SrcReg, DstReg, Opc);
+ SrcReg = SrcReg.id() + (ReversedCopy ? -NumCopied : NumCopied);
+ DstReg = DstReg.id() + (ReversedCopy ? -NumCopied : NumCopied);
+ I += NumCopied;
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir
index 4fa29e174602d0..dd569129db4d72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir
@@ -8,7 +8,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 82 = e32,m4
; CHECK-LABEL: name: copy_different_lmul
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -25,7 +24,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 82 = e32,m4
; CHECK-LABEL: name: copy_convert_to_vmv_v_v
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -42,7 +40,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14
- ; 82 = e32,m4
; CHECK-LABEL: name: copy_convert_to_vmv_v_i
; CHECK: liveins: $x14
; CHECK-NEXT: {{ $}}
@@ -59,7 +56,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 82 = e32,m4
; CHECK-LABEL: name: copy_from_whole_load_store
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -76,7 +72,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 82 = e32,m4
; CHECK-LABEL: name: copy_with_vleff
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -95,8 +90,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16, $x17, $x18
- ; 82 = e32,m4
- ; 73 = e16,m2
; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_1
; CHECK: liveins: $x14, $x16, $x17, $x18
; CHECK-NEXT: {{ $}}
@@ -121,8 +114,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16, $x17, $x18
- ; 82 = e32,m4
- ; 73 = e16,m2
; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_2
; CHECK: liveins: $x14, $x16, $x17, $x18
; CHECK-NEXT: {{ $}}
@@ -147,8 +138,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16, $x17, $x18
- ; 82 = e32,m4
- ; 73 = e16,m2
; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_3
; CHECK: liveins: $x14, $x16, $x17, $x18
; CHECK-NEXT: {{ $}}
@@ -169,7 +158,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x16, $x17
- ; 73 = e16,m2
; CHECK-LABEL: name: copy_subregister
; CHECK: liveins: $x16, $x17
; CHECK-NEXT: {{ $}}
@@ -191,8 +179,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 82 = e32,m4
- ; 74 = e16,m4
; CHECK-LABEL: name: copy_with_different_vlmax
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -231,7 +217,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 80 = e32,m1
; CHECK-LABEL: name: copy_zvlsseg_reg
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -248,14 +233,12 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 80 = e32,m1
; CHECK-LABEL: name: copy_zvlsseg_reg_2
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: $v10 = PseudoVMV_V_V_M1 undef $v10, $v8, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: $v11 = PseudoVMV_V_V_M1 undef $v11, $v9, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $v10m2 = PseudoVMV_V_V_M2 undef $v10m2, $v8m2, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
$x15 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype
$v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype
$v10_v11 = COPY $v8_v9
@@ -266,7 +249,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x14, $x16
- ; 87 = e32,mf2
; CHECK-LABEL: name: copy_fractional_lmul
; CHECK: liveins: $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -283,7 +265,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x12, $x14, $x16
- ; 80 = e32,m1
; CHECK-LABEL: name: copy_implicit_def
; CHECK: liveins: $x12, $x14, $x16
; CHECK-NEXT: {{ $}}
@@ -291,14 +272,7 @@ body: |
; CHECK-NEXT: $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $x0 = PseudoVSETIVLI 10, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: $v15 = PseudoVLE32_V_M1 undef $v15, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15
- ; CHECK-NEXT: $v24 = VMV1R_V killed $v8
- ; CHECK-NEXT: $v25 = VMV1R_V killed $v9
- ; CHECK-NEXT: $v26 = VMV1R_V killed $v10
- ; CHECK-NEXT: $v27 = VMV1R_V killed $v11
- ; CHECK-NEXT: $v28 = VMV1R_V killed $v12
- ; CHECK-NEXT: $v29 = VMV1R_V killed $v13
- ; CHECK-NEXT: $v30 = VMV1R_V killed $v14
- ; CHECK-NEXT: $v31 = VMV1R_V killed $v15
+ ; CHECK-NEXT: $v24m8 = VMV8R_V killed $v8m8
$x0 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype
$v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5, 0, implicit $vl, implicit $vtype
$x0 = PseudoVSETIVLI 10, 80, implicit-def $vl, implicit-def $vtype
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir
index 85bb54471ed3c8..afd8b27a3b90b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir
@@ -7,30 +7,24 @@ name: copy_zvlsseg_N2
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N2
- ; CHECK: $v2 = VMV1R_V $v4
- ; CHECK-NEXT: $v3 = VMV1R_V $v5
+ ; CHECK: $v2m2 = VMV2R_V $v4m2
; CHECK-NEXT: $v3 = VMV1R_V $v4
; CHECK-NEXT: $v4 = VMV1R_V $v5
- ; CHECK-NEXT: $v6 = VMV1R_V $v5
; CHECK-NEXT: $v5 = VMV1R_V $v4
- ; CHECK-NEXT: $v6 = VMV1R_V $v4
- ; CHECK-NEXT: $v7 = VMV1R_V $v5
- ; CHECK-NEXT: $v0m2 = VMV2R_V $v4m2
- ; CHECK-NEXT: $v2m2 = VMV2R_V $v6m2
+ ; CHECK-NEXT: $v4 = VMV1R_V $v3
+ ; CHECK-NEXT: $v6m2 = VMV2R_V $v4m2
+ ; CHECK-NEXT: $v0m4 = VMV4R_V $v4m4
; CHECK-NEXT: $v2m2 = VMV2R_V $v4m2
; CHECK-NEXT: $v4m2 = VMV2R_V $v6m2
- ; CHECK-NEXT: $v8m2 = VMV2R_V $v6m2
; CHECK-NEXT: $v6m2 = VMV2R_V $v4m2
- ; CHECK-NEXT: $v8m2 = VMV2R_V $v4m2
- ; CHECK-NEXT: $v10m2 = VMV2R_V $v6m2
- ; CHECK-NEXT: $v0m4 = VMV4R_V $v8m4
- ; CHECK-NEXT: $v4m4 = VMV4R_V $v12m4
+ ; CHECK-NEXT: $v4m2 = VMV2R_V $v0m2
+ ; CHECK-NEXT: $v8m4 = VMV4R_V $v4m4
+ ; CHECK-NEXT: $v0m8 = VMV8R_V $v8m8
; CHECK-NEXT: $v4m4 = VMV4R_V $v8m4
; CHECK-NEXT: $v8m4 = VMV4R_V $v12m4
- ; CHECK-NEXT: $v16m4 = VMV4R_V $v12m4
; CHECK-NEXT: $v12m4 = VMV4R_V $v8m4
- ; CHECK-NEXT: $v16m4 = VMV4R_V $v8m4
- ; CHECK-NEXT: $v20m4 = VMV4R_V $v12m4
+ ; CHECK-NEXT: $v8m4 = VMV4R_V $v4m4
+ ; CHECK-NEXT: $v16m8 = VMV8R_V $v8m8
$v2_v3 = COPY $v4_v5
$v3_v4 = COPY $v4_v5
$v5_v6 = COPY $v4_v5
@@ -55,29 +49,23 @@ body: |
; CHECK-NEXT: $v3 = VMV1R_V $v6
; CHECK-NEXT: $v4 = VMV1R_V $v7
; CHECK-NEXT: $v3 = VMV1R_V $v5
- ; CHECK-NEXT: $v4 = VMV1R_V $v6
- ; CHECK-NEXT: $v5 = VMV1R_V $v7
+ ; CHECK-NEXT: $v4m2 = VMV2R_V $v6m2
; CHECK-NEXT: $v4 = VMV1R_V $v5
; CHECK-NEXT: $v5 = VMV1R_V $v6
; CHECK-NEXT: $v6 = VMV1R_V $v7
- ; CHECK-NEXT: $v9 = VMV1R_V $v7
- ; CHECK-NEXT: $v8 = VMV1R_V $v6
; CHECK-NEXT: $v7 = VMV1R_V $v5
+ ; CHECK-NEXT: $v6m2 = VMV2R_V $v4m2
; CHECK-NEXT: $v9 = VMV1R_V $v5
- ; CHECK-NEXT: $v10 = VMV1R_V $v6
- ; CHECK-NEXT: $v11 = VMV1R_V $v7
+ ; CHECK-NEXT: $v10m2 = VMV2R_V $v6m2
; CHECK-NEXT: $v0m2 = VMV2R_V $v6m2
- ; CHECK-NEXT: $v2m2 = VMV2R_V $v8m2
- ; CHECK-NEXT: $v4m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v0m4 = VMV4R_V $v8m4
; CHECK-NEXT: $v2m2 = VMV2R_V $v6m2
- ; CHECK-NEXT: $v4m2 = VMV2R_V $v8m2
- ; CHECK-NEXT: $v6m2 = VMV2R_V $v10m2
- ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2
- ; CHECK-NEXT: $v12m2 = VMV2R_V $v8m2
+ ; CHECK-NEXT: $v4m4 = VMV4R_V $v8m4
; CHECK-NEXT: $v10m2 = VMV2R_V $v6m2
+ ; CHECK-NEXT: $v8m4 = VMV4R_V $v4m4
; CHECK-NEXT: $v12m2 = VMV2R_V $v6m2
; CHECK-NEXT: $v14m2 = VMV2R_V $v8m2
- ; CHECK-NEXT: $v16m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v16m2 = VMV2R_V $v8m2
$v2_v3_v4 = COPY $v5_v6_v7
$v3_v4_v5 = COPY $v5_v6_v7
$v4_v5_v6 = COPY $v5_v6_v7
@@ -94,37 +82,30 @@ name: copy_zvlsseg_N4
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N4
- ; CHECK: $v6 = VMV1R_V $v10
- ; CHECK-NEXT: $v7 = VMV1R_V $v11
- ; CHECK-NEXT: $v8 = VMV1R_V $v12
- ; CHECK-NEXT: $v9 = VMV1R_V $v13
+ ; CHECK: $v6m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v8m2 = VMV2R_V $v12m2
; CHECK-NEXT: $v7 = VMV1R_V $v10
; CHECK-NEXT: $v8 = VMV1R_V $v11
; CHECK-NEXT: $v9 = VMV1R_V $v12
; CHECK-NEXT: $v10 = VMV1R_V $v13
- ; CHECK-NEXT: $v16 = VMV1R_V $v13
- ; CHECK-NEXT: $v15 = VMV1R_V $v12
- ; CHECK-NEXT: $v14 = VMV1R_V $v11
; CHECK-NEXT: $v13 = VMV1R_V $v10
- ; CHECK-NEXT: $v14 = VMV1R_V $v10
- ; CHECK-NEXT: $v15 = VMV1R_V $v11
- ; CHECK-NEXT: $v16 = VMV1R_V $v12
- ; CHECK-NEXT: $v17 = VMV1R_V $v13
+ ; CHECK-NEXT: $v12 = VMV1R_V $v9
+ ; CHECK-NEXT: $v11 = VMV1R_V $v8
+ ; CHECK-NEXT: $v10 = VMV1R_V $v7
+ ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v16m2 = VMV2R_V $v12m2
; CHECK-NEXT: $v2m2 = VMV2R_V $v10m2
- ; CHECK-NEXT: $v4m2 = VMV2R_V $v12m2
- ; CHECK-NEXT: $v6m2 = VMV2R_V $v14m2
+ ; CHECK-NEXT: $v4m4 = VMV4R_V $v12m4
; CHECK-NEXT: $v8m2 = VMV2R_V $v16m2
; CHECK-NEXT: $v4m2 = VMV2R_V $v10m2
; CHECK-NEXT: $v6m2 = VMV2R_V $v12m2
; CHECK-NEXT: $v8m2 = VMV2R_V $v14m2
- ; CHECK-NEXT: $v10m2 = VMV2R_V $v16m2
- ; CHECK-NEXT: $v22m2 = VMV2R_V $v16m2
- ; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2
- ; CHECK-NEXT: $v18m2 = VMV2R_V $v12m2
+ ; CHECK-NEXT: $v8m2 = VMV2R_V $v16m2
; CHECK-NEXT: $v16m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v12m4 = VMV4R_V $v8m4
+ ; CHECK-NEXT: $v8m2 = VMV2R_V $v4m2
; CHECK-NEXT: $v18m2 = VMV2R_V $v10m2
- ; CHECK-NEXT: $v20m2 = VMV2R_V $v12m2
- ; CHECK-NEXT: $v22m2 = VMV2R_V $v14m2
+ ; CHECK-NEXT: $v20m4 = VMV4R_V $v12m4
; CHECK-NEXT: $v24m2 = VMV2R_V $v16m2
$v6_v7_v8_v9 = COPY $v10_v11_v12_v13
$v7_v8_v9_v10 = COPY $v10_v11_v12_v13
@@ -146,16 +127,12 @@ body: |
; CHECK-NEXT: $v7 = VMV1R_V $v12
; CHECK-NEXT: $v8 = VMV1R_V $v13
; CHECK-NEXT: $v9 = VMV1R_V $v14
- ; CHECK-NEXT: $v6 = VMV1R_V $v10
- ; CHECK-NEXT: $v7 = VMV1R_V $v11
- ; CHECK-NEXT: $v8 = VMV1R_V $v12
- ; CHECK-NEXT: $v9 = VMV1R_V $v13
- ; CHECK-NEXT: $v10 = VMV1R_V $v14
- ; CHECK-NEXT: $v18 = VMV1R_V $v14
- ; CHECK-NEXT: $v17 = VMV1R_V $v13
- ; CHECK-NEXT: $v16 = VMV1R_V $v12
- ; CHECK-NEXT: $v15 = VMV1R_V $v11
- ; CHECK-NEXT: $v14 = VMV1R_V $v10
+ ; CHECK-NEXT: $v6m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v8m2 = VMV2R_V $v12m2
+ ; CHECK-NEXT: $v8 = VMV1R_V $v14
+ ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v12m2 = VMV2R_V $v8m2
+ ; CHECK-NEXT: $v8 = VMV1R_V $v4
; CHECK-NEXT: $v15 = VMV1R_V $v10
; CHECK-NEXT: $v16 = VMV1R_V $v11
; CHECK-NEXT: $v17 = VMV1R_V $v12
@@ -171,30 +148,23 @@ name: copy_zvlsseg_N6
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N6
- ; CHECK: $v4 = VMV1R_V $v10
- ; CHECK-NEXT: $v5 = VMV1R_V $v11
- ; CHECK-NEXT: $v6 = VMV1R_V $v12
- ; CHECK-NEXT: $v7 = VMV1R_V $v13
- ; CHECK-NEXT: $v8 = VMV1R_V $v14
- ; CHECK-NEXT: $v9 = VMV1R_V $v15
+ ; CHECK: $v4m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v6m2 = VMV2R_V $v12m2
+ ; CHECK-NEXT: $v8m2 = VMV2R_V $v14m2
; CHECK-NEXT: $v5 = VMV1R_V $v10
; CHECK-NEXT: $v6 = VMV1R_V $v11
; CHECK-NEXT: $v7 = VMV1R_V $v12
; CHECK-NEXT: $v8 = VMV1R_V $v13
; CHECK-NEXT: $v9 = VMV1R_V $v14
; CHECK-NEXT: $v10 = VMV1R_V $v15
- ; CHECK-NEXT: $v20 = VMV1R_V $v15
- ; CHECK-NEXT: $v19 = VMV1R_V $v14
- ; CHECK-NEXT: $v18 = VMV1R_V $v13
- ; CHECK-NEXT: $v17 = VMV1R_V $v12
- ; CHECK-NEXT: $v16 = VMV1R_V $v11
; CHECK-NEXT: $v15 = VMV1R_V $v10
- ; CHECK-NEXT: $v16 = VMV1R_V $v10
- ; CHECK-NEXT: $v17 = VMV1R_V $v11
- ; CHECK-NEXT: $v18 = VMV1R_V $v12
- ; CHECK-NEXT: $v19 = VMV1R_V $v13
- ; CHECK-NEXT: $v20 = VMV1R_V $v14
- ; CHECK-NEXT: $v21 = VMV1R_V $v15
+ ; CHECK-NEXT: $v14 = VMV1R_V $v9
+ ; CHECK-NEXT: $v13 = VMV1R_V $v8
+ ; CHECK-NEXT: $v12 = VMV1R_V $v7
+ ; CHECK-NEXT: $v11 = VMV1R_V $v6
+ ; CHECK-NEXT: $v10 = VMV1R_V $v5
+ ; CHECK-NEXT: $v16m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v16m4 = VMV4R_V $v12m4
$v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15
$v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15
$v15_v16_v17_v18_v19_v20 = COPY $v10_v11_v12_v13_v14_v15
@@ -212,20 +182,13 @@ body: |
; CHECK-NEXT: $v7 = VMV1R_V $v14
; CHECK-NEXT: $v8 = VMV1R_V $v15
; CHECK-NEXT: $v9 = VMV1R_V $v16
- ; CHECK-NEXT: $v4 = VMV1R_V $v10
- ; CHECK-NEXT: $v5 = VMV1R_V $v11
- ; CHECK-NEXT: $v6 = VMV1R_V $v12
- ; CHECK-NEXT: $v7 = VMV1R_V $v13
- ; CHECK-NEXT: $v8 = VMV1R_V $v14
- ; CHECK-NEXT: $v9 = VMV1R_V $v15
- ; CHECK-NEXT: $v10 = VMV1R_V $v16
- ; CHECK-NEXT: $v22 = VMV1R_V $v16
- ; CHECK-NEXT: $v21 = VMV1R_V $v15
- ; CHECK-NEXT: $v20 = VMV1R_V $v14
- ; CHECK-NEXT: $v19 = VMV1R_V $v13
- ; CHECK-NEXT: $v18 = VMV1R_V $v12
- ; CHECK-NEXT: $v17 = VMV1R_V $v11
- ; CHECK-NEXT: $v16 = VMV1R_V $v10
+ ; CHECK-NEXT: $v4m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v6m2 = VMV2R_V $v12m2
+ ; CHECK-NEXT: $v8m2 = VMV2R_V $v14m2
+ ; CHECK-NEXT: $v8 = VMV1R_V $v16
+ ; CHECK-NEXT: $v16m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v12m4 = VMV4R_V $v8m4
+ ; CHECK-NEXT: $v8 = VMV1R_V $v4
; CHECK-NEXT: $v17 = VMV1R_V $v10
; CHECK-NEXT: $v18 = VMV1R_V $v11
; CHECK-NEXT: $v19 = VMV1R_V $v12
@@ -243,14 +206,9 @@ name: copy_zvlsseg_N8
body: |
bb.0:
; CHECK-LABEL: name: copy_zvlsseg_N8
- ; CHECK: $v2 = VMV1R_V $v10
- ; CHECK-NEXT: $v3 = VMV1R_V $v11
- ; CHECK-NEXT: $v4 = VMV1R_V $v12
- ; CHECK-NEXT: $v5 = VMV1R_V $v13
- ; CHECK-NEXT: $v6 = VMV1R_V $v14
- ; CHECK-NEXT: $v7 = VMV1R_V $v15
- ; CHECK-NEXT: $v8 = VMV1R_V $v16
- ; CHECK-NEXT: $v9 = VMV1R_V $v17
+ ; CHECK: $v2m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v4m4 = VMV4R_V $v12m4
+ ; CHECK-NEXT: $v8m2 = VMV2R_V $v16m2
; CHECK-NEXT: $v3 = VMV1R_V $v10
; CHECK-NEXT: $v4 = VMV1R_V $v11
; CHECK-NEXT: $v5 = VMV1R_V $v12
@@ -259,22 +217,17 @@ body: |
; CHECK-NEXT: $v8 = VMV1R_V $v15
; CHECK-NEXT: $v9 = VMV1R_V $v16
; CHECK-NEXT: $v10 = VMV1R_V $v17
- ; CHECK-NEXT: $v24 = VMV1R_V $v17
- ; CHECK-NEXT: $v23 = VMV1R_V $v16
- ; CHECK-NEXT: $v22 = VMV1R_V $v15
- ; CHECK-NEXT: $v21 = VMV1R_V $v14
- ; CHECK-NEXT: $v20 = VMV1R_V $v13
- ; CHECK-NEXT: $v19 = VMV1R_V $v12
- ; CHECK-NEXT: $v18 = VMV1R_V $v11
; CHECK-NEXT: $v17 = VMV1R_V $v10
- ; CHECK-NEXT: $v18 = VMV1R_V $v10
- ; CHECK-NEXT: $v19 = VMV1R_V $v11
- ; CHECK-NEXT: $v20 = VMV1R_V $v12
- ; CHECK-NEXT: $v21 = VMV1R_V $v13
- ; CHECK-NEXT: $v22 = VMV1R_V $v14
- ; CHECK-NEXT: $v23 = VMV1R_V $v15
- ; CHECK-NEXT: $v24 = VMV1R_V $v16
- ; CHECK-NEXT: $v25 = VMV1R_V $v17
+ ; CHECK-NEXT: $v16 = VMV1R_V $v9
+ ; CHECK-NEXT: $v15 = VMV1R_V $v8
+ ; CHECK-NEXT: $v14 = VMV1R_V $v7
+ ; CHECK-NEXT: $v13 = VMV1R_V $v6
+ ; CHECK-NEXT: $v12 = VMV1R_V $v5
+ ; CHECK-NEXT: $v11 = VMV1R_V $v4
+ ; CHECK-NEXT: $v10 = VMV1R_V $v3
+ ; CHECK-NEXT: $v18m2 = VMV2R_V $v10m2
+ ; CHECK-NEXT: $v20m4 = VMV4R_V $v12m4
+ ; CHECK-NEXT: $v24m2 = VMV2R_V $v16m2
$v2_v3_v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15_v16_v17
$v3_v4_v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15_v16_v17
$v17_v18_v19_v20_v21_v22_v23_v24 = COPY $v10_v11_v12_v13_v14_v15_v16_v17
>From de09e8b8d26c835e551879c058cda3f8130ae053 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 14 Mar 2024 16:45:26 +0800
Subject: [PATCH 2/2] Address comments and fix wrong LMUL passed to
isConvertibleToVMV_V_V
Created using spr 1.3.4
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 104 +++++++++++------------
llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir | 12 ++-
2 files changed, 58 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 9fe5666d6a81f4..3dd0d0c456dd5f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -313,73 +313,65 @@ void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB,
forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);
unsigned I = 0;
+ auto GetCopyInfo = [&](MCRegister SrcReg, MCRegister DstReg)
+ -> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,
+ unsigned, unsigned> {
+ unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
+ unsigned DstEncoding = TRI->getEncodingValue(DstReg);
+ if (!(SrcEncoding & 0b111) && !(DstEncoding & 0b111) && I + 8 <= NumRegs)
+ return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
+ RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
+ if (!(SrcEncoding & 0b11) && !(DstEncoding & 0b11) && I + 4 <= NumRegs)
+ return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
+ RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
+ if (!(SrcEncoding & 0b1) && !(DstEncoding & 0b1) && I + 2 <= NumRegs)
+ return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
+ RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
+ return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
+ RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
+ };
while (I != NumRegs) {
- auto GetCopyInfo =
- [&](unsigned SrcReg,
- unsigned DstReg) -> std::tuple<int, const TargetRegisterClass &,
- unsigned, unsigned, unsigned> {
- unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
- unsigned DstEncoding = TRI->getEncodingValue(DstReg);
- if (!(SrcEncoding & 0b111) && !(DstEncoding & 0b111) && I + 8 <= NumRegs)
- return {8, RISCV::VRM8RegClass, RISCV::VMV8R_V, RISCV::PseudoVMV_V_V_M8,
- RISCV::PseudoVMV_V_I_M8};
- if (!(SrcEncoding & 0b11) && !(DstEncoding & 0b11) && I + 4 <= NumRegs)
- return {4, RISCV::VRM4RegClass, RISCV::VMV4R_V, RISCV::PseudoVMV_V_V_M4,
- RISCV::PseudoVMV_V_I_M4};
- if (!(SrcEncoding & 0b1) && !(DstEncoding & 0b1) && I + 2 <= NumRegs)
- return {2, RISCV::VRM2RegClass, RISCV::VMV2R_V, RISCV::PseudoVMV_V_V_M2,
- RISCV::PseudoVMV_V_I_M2};
- return {1, RISCV::VRRegClass, RISCV::VMV1R_V, RISCV::PseudoVMV_V_V_M1,
- RISCV::PseudoVMV_V_I_M1};
- };
-
- auto [NumCopied, RegClass, Opc, VVOpc, VIOpc] = GetCopyInfo(SrcReg, DstReg);
+ auto [LMul, RegClass, Opc, VVOpc, VIOpc] = GetCopyInfo(SrcReg, DstReg);
+ unsigned NumCopied = 1 << LMul;
MachineBasicBlock::const_iterator DefMBBI;
if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
Opc = VVOpc;
-
- if (DefMBBI->getOpcode() == VIOpc) {
+ if (DefMBBI->getOpcode() == VIOpc)
Opc = VIOpc;
- }
}
- for (MCPhysReg Reg : RegClass.getRegisters()) {
- if (TRI->getEncodingValue(Reg) == TRI->getEncodingValue(SrcReg)) {
- SrcReg = Reg;
- break;
- }
- }
+ ArrayRef<MCPhysReg> Regs = RegClass.getRegisters();
+ const auto *FoundSrcReg = llvm::find_if(Regs, [&](MCPhysReg Reg) {
+ return TRI->getEncodingValue(Reg) == TRI->getEncodingValue(SrcReg);
+ });
+ assert(FoundSrcReg != Regs.end());
+ SrcReg = *FoundSrcReg;
- for (MCPhysReg Reg : RegClass.getRegisters()) {
- if (TRI->getEncodingValue(Reg) == TRI->getEncodingValue(DstReg)) {
- DstReg = Reg;
- break;
- }
+ const auto *FoundDstReg = llvm::find_if(Regs, [&](MCPhysReg Reg) {
+ return TRI->getEncodingValue(Reg) == TRI->getEncodingValue(DstReg);
+ });
+ assert(FoundDstReg != Regs.end());
+ DstReg = *FoundDstReg;
+
+ auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
+ bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
+ bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
+ if (UseVMV)
+ MIB.addReg(DstReg, RegState::Undef);
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(2));
+ else
+ MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (UseVMV) {
+ const MCInstrDesc &Desc = DefMBBI->getDesc();
+ MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
+ MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
+ MIB.addImm(0); // tu, mu
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
}
- auto EmitCopy = [&](MCRegister SrcReg, MCRegister DstReg, unsigned Opcode) {
- auto MIB = BuildMI(MBB, MBBI, DL, get(Opcode), DstReg);
- bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opcode) == RISCV::VMV_V_I;
- bool UseVMV =
- UseVMV_V_I || RISCV::getRVVMCOpcode(Opcode) == RISCV::VMV_V_V;
- if (UseVMV)
- MIB.addReg(DstReg, RegState::Undef);
- if (UseVMV_V_I)
- MIB = MIB.add(DefMBBI->getOperand(2));
- else
- MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (UseVMV) {
- const MCInstrDesc &Desc = DefMBBI->getDesc();
- MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
- MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
- MIB.addImm(0); // tu, mu
- MIB.addReg(RISCV::VL, RegState::Implicit);
- MIB.addReg(RISCV::VTYPE, RegState::Implicit);
- }
- };
-
- EmitCopy(SrcReg, DstReg, Opc);
SrcReg = SrcReg.id() + (ReversedCopy ? -NumCopied : NumCopied);
DstReg = DstReg.id() + (ReversedCopy ? -NumCopied : NumCopied);
I += NumCopied;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir
index dd569129db4d72..6449dd0bfc67c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir
@@ -238,7 +238,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: $v10m2 = PseudoVMV_V_V_M2 undef $v10m2, $v8m2, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $v10 = PseudoVMV_V_V_M1 undef $v10, $v8, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $v11 = PseudoVMV_V_V_M1 undef $v11, $v9, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
$x15 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype
$v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype
$v10_v11 = COPY $v8_v9
@@ -272,7 +273,14 @@ body: |
; CHECK-NEXT: $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $x0 = PseudoVSETIVLI 10, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: $v15 = PseudoVLE32_V_M1 undef $v15, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15
- ; CHECK-NEXT: $v24m8 = VMV8R_V killed $v8m8
+ ; CHECK-NEXT: $v24 = VMV1R_V killed $v8
+ ; CHECK-NEXT: $v25 = VMV1R_V killed $v9
+ ; CHECK-NEXT: $v26 = VMV1R_V killed $v10
+ ; CHECK-NEXT: $v27 = VMV1R_V killed $v11
+ ; CHECK-NEXT: $v28 = VMV1R_V killed $v12
+ ; CHECK-NEXT: $v29 = VMV1R_V killed $v13
+ ; CHECK-NEXT: $v30 = VMV1R_V killed $v14
+ ; CHECK-NEXT: $v31 = VMV1R_V killed $v15
$x0 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype
$v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5, 0, implicit $vl, implicit $vtype
$x0 = PseudoVSETIVLI 10, 80, implicit-def $vl, implicit-def $vtype
More information about the llvm-branch-commits
mailing list