[llvm] [RISCV] Enable LoopDataPrefetch pass (PR #66201)
Wang Pengcheng via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 13 05:21:02 PDT 2023
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/66201:
>From b87f1417c4e02cda58994da9501d87900f35e55d Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Wed, 13 Sep 2023 18:25:00 +0800
Subject: [PATCH 1/2] [RISCV] Add searchable table for tune information
There are many information that can be used for tuning, like
alignments, cache line size, etc. But we can't make all of them
`SubtargetFeature` because some of them are not with enumerable
value, for example, `PrefetchDistance` used by `LoopDataPrefetch`.
In this patch, a searchable table `RISCVTuneInfoTable` is added,
in which each entry contains the CPU name and all tune information
defined in `RISCVTuneInfo`. Each field of `RISCVTuneInfo` should
have a default value and processor definitions can override the
default value via `let` statements.
When initilizing `RISCVSubtarget`, we will use `TuneCPU` as the
key to serach the tune info table. So, the behavior here is if
we don't specify the tune CPU, we will use specified `CPU`, which
is expected I think.
This patch almost undoes 61ab106, in which I added tune features
of preferred function/loop alignments. More tune information can
be added in the future.
---
llvm/lib/Target/RISCV/RISCVFeatures.td | 9 ---------
llvm/lib/Target/RISCV/RISCVProcessors.td | 20 ++++++++++++++++++--
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 7 +++++++
llvm/lib/Target/RISCV/RISCVSubtarget.h | 23 +++++++++++++++++++----
llvm/test/CodeGen/RISCV/align-loops.ll | 2 --
llvm/test/CodeGen/RISCV/align.ll | 4 ----
6 files changed, 44 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 6381263b37613b3..367f0fbbe44801b 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -950,12 +950,3 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"AllowTaggedGlobals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;
-
-foreach align = [2, 4, 8, 16, 32, 64] in {
- def TunePrefFunctionAlignment # align :
- SubtargetFeature<"pref-func-align-" # align, "PrefFunctionAlignment",
- "Align(" # align # ")", "Set preferred function alignment to " # align # " bytes">;
- def TunePrefLoopAlignment # align :
- SubtargetFeature<"pref-loop-align-" # align, "PrefLoopAlignment",
- "Align(" # align # ")", "Set preferred loop alignment to " # align # " bytes">;
-}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 01291001cd7ca24..11443811fe068f0 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -10,12 +10,28 @@
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
+class RISCVTuneInfo {
+ bits<8> PrefFunctionAlignment = 1;
+ bits<8> PrefLoopAlignment = 1;
+}
+
+def RISCVTuneInfoTable : GenericTable {
+ let FilterClass = "RISCVTuneInfo";
+ let CppTypeName = "RISCVTuneInfo";
+ let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment"];
+}
+
+def getRISCVTuneInfo : SearchIndex {
+ let Table = RISCVTuneInfoTable;
+ let Key = ["Name"];
+}
+
class RISCVProcessorModel<string n,
SchedMachineModel m,
list<SubtargetFeature> f,
list<SubtargetFeature> tunef = [],
string default_march = "">
- : ProcessorModel<n, m, f, tunef> {
+ : ProcessorModel<n, m, f, tunef>, RISCVTuneInfo {
string DefaultMarch = default_march;
}
@@ -23,7 +39,7 @@ class RISCVTuneProcessorModel<string n,
SchedMachineModel m,
list<SubtargetFeature> tunef = [],
list<SubtargetFeature> f = []>
- : ProcessorModel<n, m, f,tunef>;
+ : ProcessorModel<n, m, f,tunef>, RISCVTuneInfo;
def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32",
NoSchedModel,
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index aa0275830e2a87a..572aa676edbbef4 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -29,6 +29,12 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "RISCVGenSubtargetInfo.inc"
+namespace llvm::RISCVTuneInfoTable {
+
+#define GET_RISCVTuneInfoTable_IMPL
+#include "RISCVGenSearchableTables.inc"
+} // namespace llvm::RISCVTuneInfoTable
+
static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
cl::init(true), cl::Hidden);
@@ -66,6 +72,7 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU,
TuneCPU = CPU;
ParseSubtargetFeatures(CPU, TuneCPU, FS);
+ TuneInfo = RISCVTuneInfoTable::getRISCVTuneInfo(TuneCPU);
TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName);
RISCVFeatures::validate(TT, getFeatureBits());
return *this;
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index cf64dbc21bd8a8b..027d32d54160793 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -32,6 +32,18 @@
namespace llvm {
class StringRef;
+namespace RISCVTuneInfoTable {
+
+struct RISCVTuneInfo {
+ const char *Name;
+ uint8_t PrefFunctionAlignment;
+ uint8_t PrefLoopAlignment;
+};
+
+#define GET_RISCVTuneInfoTable_DECL
+#include "RISCVGenSearchableTables.inc"
+} // namespace RISCVTuneInfoTable
+
class RISCVSubtarget : public RISCVGenSubtargetInfo {
public:
enum RISCVProcFamilyEnum : uint8_t {
@@ -54,8 +66,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
uint8_t MaxInterleaveFactor = 2;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
std::bitset<RISCV::NUM_TARGET_REGS> UserReservedRegister;
- Align PrefFunctionAlignment;
- Align PrefLoopAlignment;
+ const RISCVTuneInfoTable::RISCVTuneInfo *TuneInfo;
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
@@ -96,8 +107,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
bool enableMachineScheduler() const override { return true; }
- Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
- Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
+ Align getPrefFunctionAlignment() const {
+ return Align(TuneInfo->PrefFunctionAlignment);
+ }
+ Align getPrefLoopAlignment() const {
+ return Align(TuneInfo->PrefLoopAlignment);
+ }
/// Returns RISC-V processor family.
/// Avoid this function! CPU specifics should be kept local to this class
diff --git a/llvm/test/CodeGen/RISCV/align-loops.ll b/llvm/test/CodeGen/RISCV/align-loops.ll
index 5ef78c74d03532b..efa03992b6277f6 100644
--- a/llvm/test/CodeGen/RISCV/align-loops.ll
+++ b/llvm/test/CodeGen/RISCV/align-loops.ll
@@ -1,8 +1,6 @@
; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
; RUN: llc < %s -mtriple=riscv64 -align-loops=16 | FileCheck %s -check-prefix=ALIGN_16
; RUN: llc < %s -mtriple=riscv64 -align-loops=32 | FileCheck %s -check-prefix=ALIGN_32
-; RUN: llc < %s -mtriple=riscv64 -mattr=+pref-loop-align-16 | FileCheck %s -check-prefix=ALIGN_16
-; RUN: llc < %s -mtriple=riscv64 -mattr=+pref-loop-align-32 | FileCheck %s -check-prefix=ALIGN_32
declare void @foo()
diff --git a/llvm/test/CodeGen/RISCV/align.ll b/llvm/test/CodeGen/RISCV/align.ll
index 1fb4585f8422aa4..5807fc14efc292d 100644
--- a/llvm/test/CodeGen/RISCV/align.ll
+++ b/llvm/test/CodeGen/RISCV/align.ll
@@ -2,8 +2,6 @@
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv32 -mattr=+c -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32C
-; RUN: llc -mtriple=riscv32 -mattr=+pref-func-align-32 -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=ALIGN-32
; RUN: llc -filetype=obj -mtriple=riscv32 < %s -o %t
; RUN: llvm-readelf -S %t | FileCheck %s --check-prefixes=SEC,SEC-I
; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+c < %s -o %t
@@ -18,8 +16,6 @@ define void @foo() {
;RV32I: foo:
;RV32C: .p2align 1
;RV32C: foo:
-;ALIGN-32: .p2align 5
-;ALIGN-32: foo:
entry:
ret void
}
>From 6b8030f567287c64ebf552fb811f74ffb38bd591 Mon Sep 17 00:00:00 2001
From: wangpc <wangpengcheng.pp at bytedance.com>
Date: Fri, 8 Sep 2023 17:59:24 +0800
Subject: [PATCH 2/2] [RISCV] Enable LoopDataPrefetch pass
So that we can benefit from data prefetching when `Zicbop` extension
is supported.
Tune information for data prefetching are added in `RISCVTuneInfo`.
This PR is stacked on #66193.
---
llvm/lib/Target/RISCV/RISCVProcessors.td | 10 ++++-
llvm/lib/Target/RISCV/RISCVSubtarget.h | 22 ++++++++++
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 9 ++++
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 10 ++++-
.../LoopDataPrefetch/RISCV/basic.ll | 44 +++++++++++++++++++
.../LoopDataPrefetch/RISCV/lit.local.cfg | 2 +
6 files changed, 94 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/LoopDataPrefetch/RISCV/basic.ll
create mode 100644 llvm/test/Transforms/LoopDataPrefetch/RISCV/lit.local.cfg
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 11443811fe068f0..52a9720df37604b 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -13,12 +13,20 @@
class RISCVTuneInfo {
bits<8> PrefFunctionAlignment = 1;
bits<8> PrefLoopAlignment = 1;
+
+ // Information needed by LoopDataPrefetch.
+ bits<16> CacheLineSize = 0;
+ bits<16> PrefetchDistance = 0;
+ bits<16> MinPrefetchStride = 1;
+ bits<16> MaxPrefetchIterationsAhead = 65535;
}
def RISCVTuneInfoTable : GenericTable {
let FilterClass = "RISCVTuneInfo";
let CppTypeName = "RISCVTuneInfo";
- let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment"];
+ let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment",
+ "CacheLineSize", "PrefetchDistance",
+ "MinPrefetchStride", "MaxPrefetchIterationsAhead"];
}
def getRISCVTuneInfo : SearchIndex {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 027d32d54160793..3783c4a94e9ca98 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -38,6 +38,12 @@ struct RISCVTuneInfo {
const char *Name;
uint8_t PrefFunctionAlignment;
uint8_t PrefLoopAlignment;
+
+ // Information needed by LoopDataPrefetch.
+ uint16_t CacheLineSize;
+ uint16_t PrefetchDistance;
+ uint16_t MinPrefetchStride;
+ uint16_t MaxPrefetchIterationsAhead;
};
#define GET_RISCVTuneInfoTable_DECL
@@ -242,6 +248,22 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
&Mutations) const override;
bool useAA() const override;
+
+ unsigned getCacheLineSize() const override {
+ return TuneInfo->CacheLineSize;
+ };
+ unsigned getPrefetchDistance() const override {
+ return TuneInfo->PrefetchDistance;
+ };
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const override {
+ return TuneInfo->MinPrefetchStride;
+ };
+ unsigned getMaxPrefetchIterationsAhead() const override {
+ return TuneInfo->MaxPrefetchIterationsAhead;
+ };
};
} // End llvm namespace
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index d4fd66c9b360c4e..3a64f4cb5214ce0 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
#include <optional>
using namespace llvm;
@@ -71,6 +72,11 @@ static cl::opt<bool> EnableRISCVCopyPropagation(
cl::desc("Enable the copy propagation with RISC-V copy instr"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableLoopDataPrefetch("riscv-enable-loop-data-prefetch", cl::Hidden,
+ cl::desc("Enable the loop data prefetch pass"),
+ cl::init(true));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -285,6 +291,9 @@ void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
if (getOptLevel() != CodeGenOpt::None) {
+ if (EnableLoopDataPrefetch)
+ addPass(createLoopDataPrefetchPass());
+
addPass(createRISCVGatherScatterLoweringPass());
addPass(createInterleavedAccessPass());
addPass(createRISCVCodeGenPreparePass());
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index a9636cdf8bb17f3..efd4b7b2e8135eb 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -12,10 +12,10 @@
; CHECK-NEXT: Target Pass Configuration
; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Target Transform Information
-; CHECK-NEXT: Type-Based Alias Analysis
-; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Profile summary info
+; CHECK-NEXT: Type-Based Alias Analysis
+; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Create Garbage Collector Module Metadata
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
@@ -28,6 +28,12 @@
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
+; CHECK-NEXT: Canonicalize natural loops
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
+; CHECK-NEXT: Optimization Remark Emitter
+; CHECK-NEXT: Scalar Evolution Analysis
+; CHECK-NEXT: Loop Data Prefetch
; CHECK-NEXT: RISC-V gather/scatter lowering
; CHECK-NEXT: Interleaved Access Pass
; CHECK-NEXT: RISC-V CodeGenPrepare
diff --git a/llvm/test/Transforms/LoopDataPrefetch/RISCV/basic.ll b/llvm/test/Transforms/LoopDataPrefetch/RISCV/basic.ll
new file mode 100644
index 000000000000000..0f8c59fd27a93c3
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/RISCV/basic.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -mtriple=riscv64 -riscv-enable-loop-data-prefetch \
+; RUN: -cache-line-size=64 -prefetch-distance=64 \
+; RUN: -passes=loop-data-prefetch -S < %s | FileCheck %s
+
+define void @foo(ptr nocapture %a, ptr nocapture readonly %b) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 64
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: call void @llvm.prefetch.p0(ptr [[SCEVGEP]], i32 0, i32 3, i32 1)
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX2]], align 8
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1600
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+ %0 = load double, ptr %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+ store double %add, ptr %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopDataPrefetch/RISCV/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/RISCV/lit.local.cfg
new file mode 100644
index 000000000000000..17351748513d988
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/RISCV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not "RISCV" in config.root.targets:
+ config.unsupported = True
More information about the llvm-commits
mailing list