[llvm] f590963 - [RISCV] Implement RISCVTTIImpl::getPreferredAddressingMode for HasVendorXCVmem (#120533)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 30 18:56:31 PST 2024
Author: Philipp van Kempen
Date: 2024-12-31T10:56:28+08:00
New Revision: f590963db836ccbf7c547a3dea9dc719f24444d1
URL: https://github.com/llvm/llvm-project/commit/f590963db836ccbf7c547a3dea9dc719f24444d1
DIFF: https://github.com/llvm/llvm-project/commit/f590963db836ccbf7c547a3dea9dc719f24444d1.diff
LOG: [RISCV] Implement RISCVTTIImpl::getPreferredAddressingMode for HasVendorXCVmem (#120533)
For a simple matmult kernel this heuristic reduces the length of the
critical basic block from 15 to 20 instructions, resulting in a 20%
speedup.
**Without heuristic:**
```
13688: 001b838b cv.lb t2, (s7), 0x1
1368c: 09cdbcab cv.lb s9, t3(s11)
13690: 089db62b cv.lb a2, s1(s11)
13694: 092dbdab cv.lb s11, s2(s11)
13698: 001d028b cv.lb t0, (s10), 0x1
1369c: 00f282b3 add t0, t0, a5
136a0: 9072b52b cv.mac a0, t0, t2
136a4: 9192bfab cv.mac t6, t0, s9
136a8: 90c2beab cv.mac t4, t0, a2
136ac: 91b2bf2b cv.mac t5, t0, s11
136b0: fffc0c13 addi s8, s8, -0x1
136b4: 018e0633 add a2, t3, s8
136b8: 91b2b0ab cv.mac ra, t0, s11
136bc: 000b8d93 mv s11, s7
136c0: fc0614e3 bnez a2, 0x13688 <muriscv_nn_vec_mat_mult_t_s8+0x2f0>
#instrs = 15
```
**With heuristic:**
```
7bc0: 001c860b cv.lb a2, (s9), 0x1
7bc4: 001e0d0b cv.lb s10, (t3), 0x1
7bc8: 001e808b cv.lb ra, (t4), 0x1
7bcc: 0015038b cv.lb t2, (a0), 0x1
7bd0: 001c028b cv.lb t0, (s8), 0x1
7bd4: 00f282b3 add t0, t0, a5
7bd8: 90c2bfab cv.mac t6, t0, a2
7bdc: 91a2b92b cv.mac s2, t0, s10
7be0: 9012b5ab cv.mac a1, t0, ra
7be4: 9072b9ab cv.mac s3, t0, t2
7be8: 9072b72b cv.mac a4, t0, t2
7bec: fc851ae3 bne a0, s0, 0x7bc0 <muriscv_nn_vec_mat_mult_t_s8+0x338>
#instrs = 12
improvement = 1 - 12/15 = 0.2 = 20%
```
Added:
llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll
Modified:
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 49192bd6380223..2f9beb0b3983c5 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2329,6 +2329,15 @@ unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
}
+TTI::AddressingModeKind
+RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
+ ScalarEvolution *SE) const {
+ if (ST->hasVendorXCVmem() && !ST->is64Bit())
+ return TTI::AMK_PostIndexed;
+
+ return BasicTTIImplBase::getPreferredAddressingMode(L, SE);
+}
+
bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) {
// RISC-V specific here are "instruction number 1st priority".
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index bd90bfed6e2c95..9b364391f0fa47 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -388,6 +388,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
llvm_unreachable("unknown register class");
}
+ TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L,
+ ScalarEvolution *SE) const;
+
unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
if (Vector)
return RISCVRegisterClass::VRRC;
diff --git a/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll b/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll
new file mode 100644
index 00000000000000..c8832bf49dd6a2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/xcvmem-heuristic.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=riscv32 -mattr=+m,+xcvmem -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK
+
+define i32 @test_heuristic(ptr %b, i32 %e, i1 %0) {
+; CHECK-LABEL: test_heuristic:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: add a3, a0, a1
+; CHECK-NEXT: andi a2, a2, 1
+; CHECK-NEXT: .LBB0_1: # %loop
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: cv.lbu a1, (a3), 1
+; CHECK-NEXT: addi a0, a0, 1
+; CHECK-NEXT: beqz a2, .LBB0_1
+; CHECK-NEXT: # %bb.2: # %exit
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: ret
+entry:
+ %1 = getelementptr i8, ptr %b, i32 %e
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %2 = phi ptr [ %b, %entry ], [ %7, %loop ]
+ %3 = phi ptr [ %1, %entry ], [ %8, %loop ]
+ %4 = load i8, ptr %2, align 1
+ %5 = load i8, ptr %3, align 1
+ %6 = zext i8 %5 to i32
+ %7 = getelementptr i8, ptr %2, i32 1
+ %8 = getelementptr i8, ptr %3, i32 1
+ br i1 %0, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i32 %6
+}
More information about the llvm-commits
mailing list