[llvm] [RISCV] Increase default tail duplication threshold to 6 at -O3 (PR #98873)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 17 21:07:08 PDT 2024


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/98873

>From b1ae6dca49c9736a2b0446d0d49750b32a8223e8 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 15 Jul 2024 15:00:05 +0800
Subject: [PATCH 1/3] [RISCV] Precommit test for tail duplication

---
 .../test/CodeGen/RISCV/riscv-tail-dup-size.ll | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll

diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
new file mode 100644
index 0000000000000..ae52773d71463
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+ at a = external dso_local local_unnamed_addr global i32
+ at b = external dso_local local_unnamed_addr global i32
+ at c = external dso_local local_unnamed_addr global i32
+
+declare i32 @foo(i32)
+
+define dso_local i32 @test(i32 %n) {
+; CHECK-O2-LABEL: test:
+; CHECK-O2:       # %bb.0: # %entry
+; CHECK-O2-NEXT:    sext.w a1, a0
+; CHECK-O2-NEXT:    blez a1, .LBB0_2
+; CHECK-O2-NEXT:  # %bb.1: # %if.then
+; CHECK-O2-NEXT:    lui a1, %hi(a)
+; CHECK-O2-NEXT:    lw a1, %lo(a)(a1)
+; CHECK-O2-NEXT:    mul a0, a1, a0
+; CHECK-O2-NEXT:    j .LBB0_3
+; CHECK-O2-NEXT:  .LBB0_2: # %if.else
+; CHECK-O2-NEXT:    lui a1, %hi(b)
+; CHECK-O2-NEXT:    lw a1, %lo(b)(a1)
+; CHECK-O2-NEXT:    divw a0, a1, a0
+; CHECK-O2-NEXT:  .LBB0_3: # %if.end
+; CHECK-O2-NEXT:    lui a1, %hi(c)
+; CHECK-O2-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O2-NEXT:    addi a0, a0, -1
+; CHECK-O2-NEXT:    mulw a0, a0, a1
+; CHECK-O2-NEXT:    tail foo
+;
+; CHECK-O3-LABEL: test:
+; CHECK-O3:       # %bb.0: # %entry
+; CHECK-O3-NEXT:    sext.w a1, a0
+; CHECK-O3-NEXT:    blez a1, .LBB0_2
+; CHECK-O3-NEXT:  # %bb.1: # %if.then
+; CHECK-O3-NEXT:    lui a1, %hi(a)
+; CHECK-O3-NEXT:    lw a1, %lo(a)(a1)
+; CHECK-O3-NEXT:    mul a0, a1, a0
+; CHECK-O3-NEXT:    j .LBB0_3
+; CHECK-O3-NEXT:  .LBB0_2: # %if.else
+; CHECK-O3-NEXT:    lui a1, %hi(b)
+; CHECK-O3-NEXT:    lw a1, %lo(b)(a1)
+; CHECK-O3-NEXT:    divw a0, a1, a0
+; CHECK-O3-NEXT:  .LBB0_3: # %if.end
+; CHECK-O3-NEXT:    lui a1, %hi(c)
+; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT:    addi a0, a0, -1
+; CHECK-O3-NEXT:    mulw a0, a0, a1
+; CHECK-O3-NEXT:    tail foo
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %va = load i32, ptr @a
+  %mul = mul nsw i32 %va, %n
+  br label %if.end
+
+if.else:
+  %vb = load i32, ptr @b
+  %div = sdiv i32 %vb, %n
+  br label %if.end
+
+if.end:
+  %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ]
+  %vc = load i32, ptr @c
+  %add = add nsw i32 %phi, -1
+  %arg = mul i32 %add, %vc
+  %ret = tail call i32 @foo(i32 %arg)
+  ret i32 %ret
+}

>From 3c0c50cd99d6bb7ca2d050a64543f55f865677b3 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 15 Jul 2024 15:10:46 +0800
Subject: [PATCH 2/3] [RISCV] Increase default tail duplication threshold to 6
 at -O3

This is just like AArch64.

Changing the threshold to 6 will increase the code size, but will
also decrease direct branches. CPUs with wide fetch/issue units
can benefit from it.

The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp       | 5 +++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h         | 2 ++
 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 9 ++++++---
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 5e1b5284751f4..5f63558739003 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3763,6 +3763,11 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
   return ArrayRef(TargetFlags);
 }
 
+unsigned int
+RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+  return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
+}
+
 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
 bool RISCV::isSEXT_W(const MachineInstr &MI) {
   return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index f0c0953a3e56a..c4c8a18bda6a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -286,6 +286,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   getSerializableMachineMemOperandTargetFlags() const override;
 
+  unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+
   unsigned getUndefInitOpcode(unsigned RegClassID) const override {
     switch (RegClassID) {
     case RISCV::VRRegClassID:
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index ae52773d71463..84373ce80843f 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -4,7 +4,7 @@
 
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
-; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
 
 @a = external dso_local local_unnamed_addr global i32
 @b = external dso_local local_unnamed_addr global i32
@@ -41,12 +41,15 @@ define dso_local i32 @test(i32 %n) {
 ; CHECK-O3-NEXT:    lui a1, %hi(a)
 ; CHECK-O3-NEXT:    lw a1, %lo(a)(a1)
 ; CHECK-O3-NEXT:    mul a0, a1, a0
-; CHECK-O3-NEXT:    j .LBB0_3
+; CHECK-O3-NEXT:    lui a1, %hi(c)
+; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT:    addi a0, a0, -1
+; CHECK-O3-NEXT:    mulw a0, a0, a1
+; CHECK-O3-NEXT:    tail foo
 ; CHECK-O3-NEXT:  .LBB0_2: # %if.else
 ; CHECK-O3-NEXT:    lui a1, %hi(b)
 ; CHECK-O3-NEXT:    lw a1, %lo(b)(a1)
 ; CHECK-O3-NEXT:    divw a0, a1, a0
-; CHECK-O3-NEXT:  .LBB0_3: # %if.end
 ; CHECK-O3-NEXT:    lui a1, %hi(c)
 ; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
 ; CHECK-O3-NEXT:    addi a0, a0, -1

>From 2e27036a4f2c2ed2a284cd4275da796d517e3454 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 18 Jul 2024 12:06:54 +0800
Subject: [PATCH 3/3] Remove dso_local/local_unnamed_addr

---
 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index 84373ce80843f..ff0ff6e0dd3bd 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -6,13 +6,13 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
 
- at a = external dso_local local_unnamed_addr global i32
- at b = external dso_local local_unnamed_addr global i32
- at c = external dso_local local_unnamed_addr global i32
+ at a = external global i32
+ at b = external global i32
+ at c = external global i32
 
 declare i32 @foo(i32)
 
-define dso_local i32 @test(i32 %n) {
+define i32 @test(i32 %n) {
 ; CHECK-O2-LABEL: test:
 ; CHECK-O2:       # %bb.0: # %entry
 ; CHECK-O2-NEXT:    sext.w a1, a0



More information about the llvm-commits mailing list