[llvm] [RISCV] Increase default tail duplication threshold to 6 at -O3 (PR #98873)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 21:07:08 PDT 2024
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/98873
>From b1ae6dca49c9736a2b0446d0d49750b32a8223e8 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 15 Jul 2024 15:00:05 +0800
Subject: [PATCH 1/3] [RISCV] Precommit test for tail duplication
---
.../test/CodeGen/RISCV/riscv-tail-dup-size.ll | 76 +++++++++++++++++++
1 file changed, 76 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
new file mode 100644
index 0000000000000..ae52773d71463
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+ at a = external dso_local local_unnamed_addr global i32
+ at b = external dso_local local_unnamed_addr global i32
+ at c = external dso_local local_unnamed_addr global i32
+
+declare i32 @foo(i32)
+
+define dso_local i32 @test(i32 %n) {
+; CHECK-O2-LABEL: test:
+; CHECK-O2: # %bb.0: # %entry
+; CHECK-O2-NEXT: sext.w a1, a0
+; CHECK-O2-NEXT: blez a1, .LBB0_2
+; CHECK-O2-NEXT: # %bb.1: # %if.then
+; CHECK-O2-NEXT: lui a1, %hi(a)
+; CHECK-O2-NEXT: lw a1, %lo(a)(a1)
+; CHECK-O2-NEXT: mul a0, a1, a0
+; CHECK-O2-NEXT: j .LBB0_3
+; CHECK-O2-NEXT: .LBB0_2: # %if.else
+; CHECK-O2-NEXT: lui a1, %hi(b)
+; CHECK-O2-NEXT: lw a1, %lo(b)(a1)
+; CHECK-O2-NEXT: divw a0, a1, a0
+; CHECK-O2-NEXT: .LBB0_3: # %if.end
+; CHECK-O2-NEXT: lui a1, %hi(c)
+; CHECK-O2-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O2-NEXT: addi a0, a0, -1
+; CHECK-O2-NEXT: mulw a0, a0, a1
+; CHECK-O2-NEXT: tail foo
+;
+; CHECK-O3-LABEL: test:
+; CHECK-O3: # %bb.0: # %entry
+; CHECK-O3-NEXT: sext.w a1, a0
+; CHECK-O3-NEXT: blez a1, .LBB0_2
+; CHECK-O3-NEXT: # %bb.1: # %if.then
+; CHECK-O3-NEXT: lui a1, %hi(a)
+; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
+; CHECK-O3-NEXT: mul a0, a1, a0
+; CHECK-O3-NEXT: j .LBB0_3
+; CHECK-O3-NEXT: .LBB0_2: # %if.else
+; CHECK-O3-NEXT: lui a1, %hi(b)
+; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
+; CHECK-O3-NEXT: divw a0, a1, a0
+; CHECK-O3-NEXT: .LBB0_3: # %if.end
+; CHECK-O3-NEXT: lui a1, %hi(c)
+; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT: addi a0, a0, -1
+; CHECK-O3-NEXT: mulw a0, a0, a1
+; CHECK-O3-NEXT: tail foo
+entry:
+ %cmp = icmp sgt i32 %n, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %va = load i32, ptr @a
+ %mul = mul nsw i32 %va, %n
+ br label %if.end
+
+if.else:
+ %vb = load i32, ptr @b
+ %div = sdiv i32 %vb, %n
+ br label %if.end
+
+if.end:
+ %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ]
+ %vc = load i32, ptr @c
+ %add = add nsw i32 %phi, -1
+ %arg = mul i32 %add, %vc
+ %ret = tail call i32 @foo(i32 %arg)
+ ret i32 %ret
+}
>From 3c0c50cd99d6bb7ca2d050a64543f55f865677b3 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 15 Jul 2024 15:10:46 +0800
Subject: [PATCH 2/3] [RISCV] Increase default tail duplication threshold to 6
at -O3
This is just like AArch64.
Changing the threshold to 6 will increase the code size, but will
also decrease direct branches. CPUs with wide fetch/issue units
can benefit from it.
The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 5 +++++
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 2 ++
llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 9 ++++++---
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 5e1b5284751f4..5f63558739003 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3763,6 +3763,11 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+unsigned int
+RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+ return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
+}
+
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
bool RISCV::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index f0c0953a3e56a..c4c8a18bda6a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -286,6 +286,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const override;
+ unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+
unsigned getUndefInitOpcode(unsigned RegClassID) const override {
switch (RegClassID) {
case RISCV::VRRegClassID:
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index ae52773d71463..84373ce80843f 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -4,7 +4,7 @@
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
-; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
@a = external dso_local local_unnamed_addr global i32
@b = external dso_local local_unnamed_addr global i32
@@ -41,12 +41,15 @@ define dso_local i32 @test(i32 %n) {
; CHECK-O3-NEXT: lui a1, %hi(a)
; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
; CHECK-O3-NEXT: mul a0, a1, a0
-; CHECK-O3-NEXT: j .LBB0_3
+; CHECK-O3-NEXT: lui a1, %hi(c)
+; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT: addi a0, a0, -1
+; CHECK-O3-NEXT: mulw a0, a0, a1
+; CHECK-O3-NEXT: tail foo
; CHECK-O3-NEXT: .LBB0_2: # %if.else
; CHECK-O3-NEXT: lui a1, %hi(b)
; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
; CHECK-O3-NEXT: divw a0, a1, a0
-; CHECK-O3-NEXT: .LBB0_3: # %if.end
; CHECK-O3-NEXT: lui a1, %hi(c)
; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
; CHECK-O3-NEXT: addi a0, a0, -1
>From 2e27036a4f2c2ed2a284cd4275da796d517e3454 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 18 Jul 2024 12:06:54 +0800
Subject: [PATCH 3/3] Remove dso_local/local_unnamed_addr
---
llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index 84373ce80843f..ff0ff6e0dd3bd 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -6,13 +6,13 @@
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
- at a = external dso_local local_unnamed_addr global i32
- at b = external dso_local local_unnamed_addr global i32
- at c = external dso_local local_unnamed_addr global i32
+ at a = external global i32
+ at b = external global i32
+ at c = external global i32
declare i32 @foo(i32)
-define dso_local i32 @test(i32 %n) {
+define i32 @test(i32 %n) {
; CHECK-O2-LABEL: test:
; CHECK-O2: # %bb.0: # %entry
; CHECK-O2-NEXT: sext.w a1, a0
More information about the llvm-commits
mailing list