[llvm] [WIP][RISCV] Tune flag for fast vrgather.vv (PR #124664)
Petr Penzin via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 16:53:40 PST 2025
https://github.com/ppenzin created https://github.com/llvm/llvm-project/pull/124664
WIP, _for review_
Add tune knob for N*Log2(N) vrgather.vv cost.
>From f048b525d82757f5acadb68385a5c52680606396 Mon Sep 17 00:00:00 2001
From: Petr Penzin <ppenzin at tenstorrent.com>
Date: Mon, 27 Jan 2025 18:47:59 -0600
Subject: [PATCH] [RISCV] Tune flag for fast vrgather.vv
Add tune knob for N*Log2(N) vrgather.vv cost.
---
llvm/lib/Target/RISCV/RISCVFeatures.td | 4 ++++
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 +++++
llvm/lib/Target/RISCV/RISCVProcessors.td | 1 +
llvm/test/CodeGen/RISCV/features-info.ll | 1 +
4 files changed, 11 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 4119dd77804f1a..966c56185b3fd9 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1365,6 +1365,10 @@ def FeatureUnalignedVectorMem
"true", "Has reasonably performant unaligned vector "
"loads and stores">;
+def TuneFastVRGather
+ : SubtargetFeature<"fast-vrgather", "HasFastVRGather",
+ "true", "Has vrgather.vv with LMUL*log2(LMUL) latency">;
+
def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5e5bc0819a10cc..e49c1e7ce9edbc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2848,6 +2848,11 @@ InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
/// is generally quadratic in the number of vreg implied by LMUL. Note that
/// operand (index and possibly mask) are handled separately.
InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
+ auto LMULCost = getLMULCost(VT);
+ if (true && Subtarget.hasFastVRGather() && LMULCost.isValid()) {
+ unsigned Log = Log2_64(*LMULCost.getValue());
+ return LMULCost * Log;
+ }
return getLMULCost(VT) * getLMULCost(VT);
}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 6dfed7ddeb9f63..4f6c9a0229d51b 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -490,6 +490,7 @@ def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8",
FeatureUnalignedScalarMem,
FeatureUnalignedVectorMem]),
[TuneNoDefaultUnroll,
+ TuneFastVRGather,
TuneOptimizedZeroStrideLoad,
TunePostRAScheduler]>;
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 70fbda47a14a14..dab9bf92cef17d 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -31,6 +31,7 @@
; CHECK: experimental-zvbc32e - 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements).
; CHECK: experimental-zvkgs - 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography).
; CHECK: f - 'F' (Single-Precision Floating-Point).
+; CHECK: fast-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency
; CHECK: forced-atomics - Assume that lock-free native-width atomics are available.
; CHECK: h - 'H' (Hypervisor).
; CHECK: i - 'I' (Base Integer Instruction Set).
More information about the llvm-commits
mailing list