[llvm] 27b6080 - [RISCV] Increase default tail duplication threshold to 6 at -O3 (#98873)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 21:24:29 PDT 2024
Author: Pengcheng Wang
Date: 2024-08-01T12:24:25+08:00
New Revision: 27b608055f8e86e2decea519e6dc1ab6aff4824e
URL: https://github.com/llvm/llvm-project/commit/27b608055f8e86e2decea519e6dc1ab6aff4824e
DIFF: https://github.com/llvm/llvm-project/commit/27b608055f8e86e2decea519e6dc1ab6aff4824e.diff
LOG: [RISCV] Increase default tail duplication threshold to 6 at -O3 (#98873)
This is just like AArch64.
Changing the threshold to 6 will increase the code size, but will
also decrease unconditional branches. CPUs with wide fetch/issue units
can benefit from it.
The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.
Added:
llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.h
llvm/lib/Target/RISCV/RISCVProcessors.td
llvm/lib/Target/RISCV/RISCVSubtarget.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 0620c3fc12adc..9dd79027d7a16 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3763,6 +3763,12 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+ return OptLevel >= CodeGenOptLevel::Aggressive
+ ? STI.getTailDupAggressiveThreshold()
+ : 2;
+}
+
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
bool RISCV::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 025e12d81e60d..1612f56a8b506 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -288,6 +288,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const override;
+ unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+
unsigned getUndefInitOpcode(unsigned RegClassID) const override {
switch (RegClassID) {
case RISCV::VRRegClassID:
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 1729bc0282f51..25b24980e0bf6 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -21,6 +21,9 @@ class RISCVTuneInfo {
bits<32> MaxPrefetchIterationsAhead = -1;
bits<32> MinimumJumpTableEntries = 5;
+
+ // Tail duplication threshold at -O3.
+ bits<32> TailDupAggressiveThreshold = 6;
}
def RISCVTuneInfoTable : GenericTable {
@@ -29,7 +32,7 @@ def RISCVTuneInfoTable : GenericTable {
let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment",
"CacheLineSize", "PrefetchDistance",
"MinPrefetchStride", "MaxPrefetchIterationsAhead",
- "MinimumJumpTableEntries"];
+ "MinimumJumpTableEntries", "TailDupAggressiveThreshold"];
}
def getRISCVTuneInfo : SearchIndex {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index d38952e5196f0..ea54ff1df0b7c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -50,6 +50,9 @@ struct RISCVTuneInfo {
unsigned MaxPrefetchIterationsAhead;
unsigned MinimumJumpTableEntries;
+
+ // Tail duplication threshold at -O3.
+ unsigned TailDupAggressiveThreshold;
};
#define GET_RISCVTuneInfoTable_DECL
@@ -300,6 +303,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned getMinimumJumpTableEntries() const;
+ unsigned getTailDupAggressiveThreshold() const {
+ return TuneInfo->TailDupAggressiveThreshold;
+ }
+
bool supportsInitUndef() const override { return hasVInstructions(); }
};
} // End llvm namespace
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
new file mode 100644
index 0000000000000..0508016736004
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+ at a = external global i32
+ at b = external global i32
+ at c = external global i32
+
+declare i32 @foo(i32)
+
+define i32 @test(i32 %n) {
+; CHECK-O2-LABEL: test:
+; CHECK-O2: # %bb.0: # %entry
+; CHECK-O2-NEXT: sext.w a1, a0
+; CHECK-O2-NEXT: blez a1, .LBB0_2
+; CHECK-O2-NEXT: # %bb.1: # %if.then
+; CHECK-O2-NEXT: lui a1, %hi(a)
+; CHECK-O2-NEXT: lw a1, %lo(a)(a1)
+; CHECK-O2-NEXT: mul a0, a1, a0
+; CHECK-O2-NEXT: j .LBB0_3
+; CHECK-O2-NEXT: .LBB0_2: # %if.else
+; CHECK-O2-NEXT: lui a1, %hi(b)
+; CHECK-O2-NEXT: lw a1, %lo(b)(a1)
+; CHECK-O2-NEXT: divw a0, a1, a0
+; CHECK-O2-NEXT: .LBB0_3: # %if.end
+; CHECK-O2-NEXT: lui a1, %hi(c)
+; CHECK-O2-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O2-NEXT: addi a0, a0, -1
+; CHECK-O2-NEXT: mulw a0, a0, a1
+; CHECK-O2-NEXT: tail foo
+;
+; CHECK-O3-LABEL: test:
+; CHECK-O3: # %bb.0: # %entry
+; CHECK-O3-NEXT: sext.w a1, a0
+; CHECK-O3-NEXT: blez a1, .LBB0_2
+; CHECK-O3-NEXT: # %bb.1: # %if.then
+; CHECK-O3-NEXT: lui a1, %hi(a)
+; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
+; CHECK-O3-NEXT: mul a0, a1, a0
+; CHECK-O3-NEXT: lui a1, %hi(c)
+; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT: addi a0, a0, -1
+; CHECK-O3-NEXT: mulw a0, a0, a1
+; CHECK-O3-NEXT: tail foo
+; CHECK-O3-NEXT: .LBB0_2: # %if.else
+; CHECK-O3-NEXT: lui a1, %hi(b)
+; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
+; CHECK-O3-NEXT: divw a0, a1, a0
+; CHECK-O3-NEXT: lui a1, %hi(c)
+; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT: addi a0, a0, -1
+; CHECK-O3-NEXT: mulw a0, a0, a1
+; CHECK-O3-NEXT: tail foo
+entry:
+ %cmp = icmp sgt i32 %n, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %va = load i32, ptr @a
+ %mul = mul nsw i32 %va, %n
+ br label %if.end
+
+if.else:
+ %vb = load i32, ptr @b
+ %div = sdiv i32 %vb, %n
+ br label %if.end
+
+if.end:
+ %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ]
+ %vc = load i32, ptr @c
+ %add = add nsw i32 %phi, -1
+ %arg = mul i32 %add, %vc
+ %ret = tail call i32 @foo(i32 %arg)
+ ret i32 %ret
+}
More information about the llvm-commits
mailing list