[llvm-branch-commits] [llvm] a4fb217 - [RISCV] Extend use of SHXADD instructions in RVV spill/reload code.
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jul 12 01:24:05 PDT 2022
Author: jacquesguan
Date: 2022-07-12T16:15:19+08:00
New Revision: a4fb2175089cb26f3e608b9ad838b06aee915ebc
URL: https://github.com/llvm/llvm-project/commit/a4fb2175089cb26f3e608b9ad838b06aee915ebc
DIFF: https://github.com/llvm/llvm-project/commit/a4fb2175089cb26f3e608b9ad838b06aee915ebc.diff
LOG: [RISCV] Extend use of SHXADD instructions in RVV spill/reload code.
This patch extends D124824. It uses SHXADD+SLLI to emit 3, 5, or 9 multiplied by a power 2.
Differential Revision: https://reviews.llvm.org/D129179
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 685604ad9a59b..7d75834af7ebb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1799,17 +1799,30 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(VL, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
- } else if ((NumOfVReg == 3 || NumOfVReg == 5 || NumOfVReg == 9) &&
- STI.hasStdExtZba()) {
- // We can use Zba SHXADD instructions for multiply in some cases.
- // TODO: Generalize to SHXADD+SLLI.
+ } else if (STI.hasStdExtZba() &&
+ ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) ||
+ (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) ||
+ (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) {
+ // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
unsigned Opc;
- switch (NumOfVReg) {
- default: llvm_unreachable("Unexpected number of vregs");
- case 3: Opc = RISCV::SH1ADD; break;
- case 5: Opc = RISCV::SH2ADD; break;
- case 9: Opc = RISCV::SH3ADD; break;
+ uint32_t ShiftAmount;
+ if (NumOfVReg % 9 == 0) {
+ Opc = RISCV::SH3ADD;
+ ShiftAmount = Log2_64(NumOfVReg / 9);
+ } else if (NumOfVReg % 5 == 0) {
+ Opc = RISCV::SH2ADD;
+ ShiftAmount = Log2_64(NumOfVReg / 5);
+ } else if (NumOfVReg % 3 == 0) {
+ Opc = RISCV::SH1ADD;
+ ShiftAmount = Log2_64(NumOfVReg / 3);
+ } else {
+ llvm_unreachable("Unexpected number of vregs");
}
+ if (ShiftAmount)
+ BuildMI(MBB, II, DL, get(RISCV::SLLI), VL)
+ .addReg(VL, RegState::Kill)
+ .addImm(ShiftAmount)
+ .setMIFlag(Flag);
BuildMI(MBB, II, DL, get(Opc), VL)
.addReg(VL, RegState::Kill)
.addReg(VL)
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 7b5e0138410a5..0f2fbae8cb7f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA
define void @lmul1() nounwind {
; CHECK-LABEL: lmul1:
@@ -210,22 +210,39 @@ define void @lmul4_and_2_x2_0() nounwind {
}
define void @lmul4_and_2_x2_1() nounwind {
-; CHECK-LABEL: lmul4_and_2_x2_1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -48
-; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 48
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 12
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -32
-; CHECK-NEXT: addi sp, s0, -48
-; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 48
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul4_and_2_x2_1:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -48
+; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 48
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: li a1, 12
+; NOZBA-NEXT: mul a0, a0, a1
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -32
+; NOZBA-NEXT: addi sp, s0, -48
+; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: addi sp, sp, 48
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul4_and_2_x2_1:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -48
+; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZBA-NEXT: addi s0, sp, 48
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 2
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: andi sp, sp, -32
+; ZBA-NEXT: addi sp, s0, -48
+; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZBA-NEXT: addi sp, sp, 48
+; ZBA-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v3 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 2 x i64>
@@ -375,22 +392,39 @@ define void @masks() nounwind {
}
define void @lmul_8_x5() nounwind {
-; CHECK-LABEL: lmul_8_x5:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 80
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul_8_x5:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -80
+; NOZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 80
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: li a1, 40
+; NOZBA-NEXT: mul a0, a0, a1
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -64
+; NOZBA-NEXT: addi sp, s0, -80
+; NOZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: addi sp, sp, 80
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul_8_x5:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -80
+; ZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZBA-NEXT: addi s0, sp, 80
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 3
+; ZBA-NEXT: sh2add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: andi sp, sp, -64
+; ZBA-NEXT: addi sp, s0, -80
+; ZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZBA-NEXT: addi sp, sp, 80
+; ZBA-NEXT: ret
%v1 = alloca <vscale x 8 x i64>
%v2 = alloca <vscale x 8 x i64>
%v3 = alloca <vscale x 8 x i64>
@@ -400,22 +434,39 @@ define void @lmul_8_x5() nounwind {
}
define void @lmul_8_x9() nounwind {
-; CHECK-LABEL: lmul_8_x9:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -80
-; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT: addi s0, sp, 80
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 72
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: andi sp, sp, -64
-; CHECK-NEXT: addi sp, s0, -80
-; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 80
-; CHECK-NEXT: ret
+; NOZBA-LABEL: lmul_8_x9:
+; NOZBA: # %bb.0:
+; NOZBA-NEXT: addi sp, sp, -80
+; NOZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; NOZBA-NEXT: addi s0, sp, 80
+; NOZBA-NEXT: csrr a0, vlenb
+; NOZBA-NEXT: li a1, 72
+; NOZBA-NEXT: mul a0, a0, a1
+; NOZBA-NEXT: sub sp, sp, a0
+; NOZBA-NEXT: andi sp, sp, -64
+; NOZBA-NEXT: addi sp, s0, -80
+; NOZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; NOZBA-NEXT: addi sp, sp, 80
+; NOZBA-NEXT: ret
+;
+; ZBA-LABEL: lmul_8_x9:
+; ZBA: # %bb.0:
+; ZBA-NEXT: addi sp, sp, -80
+; ZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZBA-NEXT: addi s0, sp, 80
+; ZBA-NEXT: csrr a0, vlenb
+; ZBA-NEXT: slli a0, a0, 3
+; ZBA-NEXT: sh3add a0, a0, a0
+; ZBA-NEXT: sub sp, sp, a0
+; ZBA-NEXT: andi sp, sp, -64
+; ZBA-NEXT: addi sp, s0, -80
+; ZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZBA-NEXT: addi sp, sp, 80
+; ZBA-NEXT: ret
%v1 = alloca <vscale x 8 x i64>
%v2 = alloca <vscale x 8 x i64>
%v3 = alloca <vscale x 8 x i64>
More information about the llvm-branch-commits
mailing list