[llvm] [RISCV] Slightly improve expanded multiply emulation in getVLENFactoredAmount. (PR #84113)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 20:21:42 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
Instead of initializing the accumulator to 0. Initialize it on first
assignment with a mv from the register that holds VLENB << ShiftAmount.
Fix a missing kill flag on the final Add.
I have no real interest in this case, just an easy optimization I noticed.
Stacked on #<!-- -->84110 I should probably figure out how to use spr.
---
Full diff: https://github.com/llvm/llvm-project/pull/84113.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+22-15)
- (modified) llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll (+3-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+8-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll (+2-4)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2abe015c9f9cdc..dc1585686b6447 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3059,11 +3059,11 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
"Reserve the stack by the multiple of one vector size.");
MachineRegisterInfo &MRI = MF.getRegInfo();
- int64_t NumOfVReg = Amount / 8;
+ assert(isInt<32>(Amount / 8) &&
+ "Expect the number of vector registers within 32-bits.");
+ uint32_t NumOfVReg = Amount / 8;
BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag);
- assert(isInt<32>(NumOfVReg) &&
- "Expect the number of vector registers within 32-bits.");
if (llvm::has_single_bit<uint32_t>(NumOfVReg)) {
uint32_t ShiftAmount = Log2_32(NumOfVReg);
if (ShiftAmount == 0)
@@ -3130,30 +3130,37 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(N, RegState::Kill)
.setMIFlag(Flag);
} else {
- Register Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, get(RISCV::ADDI), Acc)
- .addReg(RISCV::X0)
- .addImm(0)
- .setMIFlag(Flag);
+ Register Acc;
uint32_t PrevShiftAmount = 0;
for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) {
- if (NumOfVReg & (1LL << ShiftAmount)) {
+ if (NumOfVReg & (1U << ShiftAmount)) {
if (ShiftAmount)
BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
.addReg(DestReg, RegState::Kill)
.addImm(ShiftAmount - PrevShiftAmount)
.setMIFlag(Flag);
- if (NumOfVReg >> (ShiftAmount + 1))
- BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
- .addReg(Acc, RegState::Kill)
- .addReg(DestReg)
- .setMIFlag(Flag);
+ if (NumOfVReg >> (ShiftAmount + 1)) {
+ // If we don't have an accmulator yet, create it and copy DestReg.
+ if (!Acc) {
+ Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, II, DL, get(RISCV::ADDI), Acc)
+ .addReg(DestReg)
+ .addImm(0)
+ .setMIFlag(Flag);
+ } else {
+ BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
+ .addReg(Acc, RegState::Kill)
+ .addReg(DestReg)
+ .setMIFlag(Flag);
+ }
+ }
PrevShiftAmount = ShiftAmount;
}
}
+ assert(Acc && "Expected valid accumulator");
BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
.addReg(DestReg, RegState::Kill)
- .addReg(Acc)
+ .addReg(Acc, RegState::Kill)
.setMIFlag(Flag);
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 78bec6c68c3f6e..466ab085b266b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -253,9 +253,8 @@ define void @lmul4_and_2_x2_1() nounwind {
; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; NOMUL-NEXT: addi s0, sp, 48
; NOMUL-NEXT: csrr a0, vlenb
-; NOMUL-NEXT: li a1, 0
; NOMUL-NEXT: slli a0, a0, 2
-; NOMUL-NEXT: add a1, a1, a0
+; NOMUL-NEXT: mv a1, a0
; NOMUL-NEXT: slli a0, a0, 1
; NOMUL-NEXT: add a0, a0, a1
; NOMUL-NEXT: sub sp, sp, a0
@@ -455,9 +454,8 @@ define void @lmul_8_x5() nounwind {
; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
; NOMUL-NEXT: addi s0, sp, 80
; NOMUL-NEXT: csrr a0, vlenb
-; NOMUL-NEXT: li a1, 0
; NOMUL-NEXT: slli a0, a0, 3
-; NOMUL-NEXT: add a1, a1, a0
+; NOMUL-NEXT: mv a1, a0
; NOMUL-NEXT: slli a0, a0, 2
; NOMUL-NEXT: add a0, a0, a1
; NOMUL-NEXT: sub sp, sp, a0
@@ -517,9 +515,8 @@ define void @lmul_8_x9() nounwind {
; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
; NOMUL-NEXT: addi s0, sp, 80
; NOMUL-NEXT: csrr a0, vlenb
-; NOMUL-NEXT: li a1, 0
; NOMUL-NEXT: slli a0, a0, 3
-; NOMUL-NEXT: add a1, a1, a0
+; NOMUL-NEXT: mv a1, a0
; NOMUL-NEXT: slli a0, a0, 3
; NOMUL-NEXT: add a0, a0, a1
; NOMUL-NEXT: sub sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 855e280164a25c..68740eec56e4c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -2133,9 +2133,8 @@ define float @vreduce_fminimum_v128f32(ptr %x) {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: sub sp, sp, a1
@@ -2256,9 +2255,8 @@ define float @vreduce_fminimum_v128f32(ptr %x) {
; CHECK-NEXT: vfmin.vv v8, v11, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
@@ -2739,9 +2737,8 @@ define double @vreduce_fminimum_v64f64(ptr %x) {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: sub sp, sp, a1
@@ -2852,9 +2849,8 @@ define double @vreduce_fminimum_v64f64(ptr %x) {
; CHECK-NEXT: vfmin.vv v8, v11, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
@@ -3461,9 +3457,8 @@ define float @vreduce_fmaximum_v128f32(ptr %x) {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: sub sp, sp, a1
@@ -3584,9 +3579,8 @@ define float @vreduce_fmaximum_v128f32(ptr %x) {
; CHECK-NEXT: vfmax.vv v8, v11, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
@@ -4067,9 +4061,8 @@ define double @vreduce_fmaximum_v64f64(ptr %x) {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 0
; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: sub sp, sp, a1
@@ -4180,9 +4173,8 @@ define double @vreduce_fmaximum_v64f64(ptr %x) {
; CHECK-NEXT: vfmax.vv v8, v11, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 0
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll
index 6a7ec6dc5bd7df..e5bef20fd9e24d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll
@@ -193,9 +193,8 @@ define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
; VLA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; VLA-NEXT: vmv1r.v v16, v15
; VLA-NEXT: csrr a0, vlenb
-; VLA-NEXT: li a1, 0
; VLA-NEXT: slli a0, a0, 3
-; VLA-NEXT: add a1, a1, a0
+; VLA-NEXT: mv a1, a0
; VLA-NEXT: slli a0, a0, 1
; VLA-NEXT: add a0, a0, a1
; VLA-NEXT: add a0, sp, a0
@@ -245,9 +244,8 @@ define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
; VLA-NEXT: li a0, 32
; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; VLA-NEXT: csrr a0, vlenb
-; VLA-NEXT: li a1, 0
; VLA-NEXT: slli a0, a0, 3
-; VLA-NEXT: add a1, a1, a0
+; VLA-NEXT: mv a1, a0
; VLA-NEXT: slli a0, a0, 1
; VLA-NEXT: add a0, a0, a1
; VLA-NEXT: add a0, sp, a0
``````````
</details>
https://github.com/llvm/llvm-project/pull/84113
More information about the llvm-commits
mailing list