[llvm] 9cce9a1 - [RISCV] Make use of SHXADD instructions in RVV spill/reload code.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue May 3 19:41:48 PDT 2022
Author: Craig Topper
Date: 2022-05-03T19:35:21-07:00
New Revision: 9cce9a126cca662316e650f9111fc715dc3e0351
URL: https://github.com/llvm/llvm-project/commit/9cce9a126cca662316e650f9111fc715dc3e0351
DIFF: https://github.com/llvm/llvm-project/commit/9cce9a126cca662316e650f9111fc715dc3e0351.diff
LOG: [RISCV] Make use of SHXADD instructions in RVV spill/reload code.
We can use SH1ADD, SH2ADD, SH3ADD to multipy by 3, 5, and 9 respectively.
We could extend this to 3, 5, or 9 multiplied by a power 2 by also
emitting a SLLI.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D124824
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 67e7038ed2ecc..bed78da8ca937 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1777,6 +1777,21 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(VL, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
+ } else if ((NumOfVReg == 3 || NumOfVReg == 5 || NumOfVReg == 9) &&
+ STI.hasStdExtZba()) {
+ // We can use Zba SHXADD instructions for multiply in some cases.
+ // TODO: Generalize to SHXADD+SLLI.
+ unsigned Opc;
+ switch (NumOfVReg) {
+ default: llvm_unreachable("Unexpected number of vregs");
+ case 3: Opc = RISCV::SH1ADD; break;
+ case 5: Opc = RISCV::SH2ADD; break;
+ case 9: Opc = RISCV::SH3ADD; break;
+ }
+ BuildMI(MBB, II, DL, get(Opc), VL)
+ .addReg(VL, RegState::Kill)
+ .addReg(VL)
+ .setMIFlag(Flag);
} else if (isPowerOf2_32(NumOfVReg - 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index ae64fbc93e237..327f28781a9e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA
define void @lmul1() nounwind {
; CHECK-LABEL: lmul1:
@@ -69,17 +71,27 @@ define void @lmul8() nounwind {
}
define void @lmul1_and_2() nounwind {
-; CHECK-LABEL: lmul1_and_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul1_and_2:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul1_and_2:
+; ZBA: # %bb.0:
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: add sp, sp, a0
+; ZBA-NEXT: ret
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 2 x i64>
ret void
@@ -108,61 +120,103 @@ define void @lmul2_and_4() nounwind {
}
define void @lmul1_and_4() nounwind {
-; CHECK-LABEL: lmul1_and_4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 32
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 2
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -32
-; CHECK-NEXT: addi sp, s0, -32
-; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul1_and_4:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -32
+; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 32
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 2
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -32
+; NOZBA-NEXT: addi sp, s0, -32
+; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: addi sp, sp, 32
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul1_and_4:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -32
+; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZBA-NEXT: addi s0, sp, 32
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh2add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: andi sp, sp, -32
+; ZBA-NEXT: addi sp, s0, -32
+; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZBA-NEXT: addi sp, sp, 32
+; ZBA-NEXT: ret
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 4 x i64>
ret void
}
define void @lmul2_and_1() nounwind {
-; CHECK-LABEL: lmul2_and_1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul2_and_1:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul2_and_1:
+; ZBA: # %bb.0:
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: add sp, sp, a0
+; ZBA-NEXT: ret
%v1 = alloca <vscale x 2 x i64>
%v2 = alloca <vscale x 1 x i64>
ret void
}
define void @lmul4_and_1() nounwind {
-; CHECK-LABEL: lmul4_and_1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 32
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 2
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -32
-; CHECK-NEXT: addi sp, s0, -32
-; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul4_and_1:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -32
+; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 32
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 2
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -32
+; NOZBA-NEXT: addi sp, s0, -32
+; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: addi sp, sp, 32
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul4_and_1:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -32
+; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZBA-NEXT: addi s0, sp, 32
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh2add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: andi sp, sp, -32
+; ZBA-NEXT: addi sp, s0, -32
+; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZBA-NEXT: addi sp, sp, 32
+; ZBA-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 1 x i64>
ret void
@@ -240,21 +294,35 @@ define void @lmul4_and_2_x2_1() nounwind {
define void @gpr_and_lmul1_and_2() nounwind {
-; CHECK-LABEL: gpr_and_lmul1_and_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: li a0, 3
-; CHECK-NEXT: sd a0, 8(sp)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; NOZBA-LABEL: gpr_and_lmul1_and_2:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -16
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: li a0, 3
+; NOZBA-NEXT: sd a0, 8(sp)
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 1
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: add sp, sp, a0
+; NOZBA-NEXT: addi sp, sp, 16
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: gpr_and_lmul1_and_2:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -16
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: li a0, 3
+; ZBA-NEXT: sd a0, 8(sp)
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: add sp, sp, a0
+; ZBA-NEXT: addi sp, sp, 16
+; ZBA-NEXT: ret
%x1 = alloca i64
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 2 x i64>
@@ -263,24 +331,42 @@ define void @gpr_and_lmul1_and_2() nounwind {
}
define void @gpr_and_lmul1_and_4() nounwind {
-; CHECK-LABEL: gpr_and_lmul1_and_4:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 32
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a1, a0, 2
-; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -32
-; CHECK-NEXT: li a0, 3
-; CHECK-NEXT: sd a0, 8(sp)
-; CHECK-NEXT: addi sp, s0, -32
-; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 32
-; CHECK-NEXT: ret
+; NOZBA-LABEL: gpr_and_lmul1_and_4:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -32
+; NOZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 32
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: slli a1, a0, 2
+; NOZBA-NEXT: add a0, a1, a0
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -32
+; NOZBA-NEXT: li a0, 3
+; NOZBA-NEXT: sd a0, 8(sp)
+; NOZBA-NEXT: addi sp, s0, -32
+; NOZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: addi sp, sp, 32
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: gpr_and_lmul1_and_4:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -32
+; ZBA-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZBA-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZBA-NEXT: addi s0, sp, 32
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: sh2add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: andi sp, sp, -32
+; ZBA-NEXT: li a0, 3
+; ZBA-NEXT: sd a0, 8(sp)
+; ZBA-NEXT: addi sp, s0, -32
+; ZBA-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZBA-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZBA-NEXT: addi sp, sp, 32
+; ZBA-NEXT: ret
%x1 = alloca i64
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 4 x i64>
More information about the llvm-commits
mailing list