[llvm] [RISCV] Increase default tail duplication threshold to 6 at -O3 (PR #98873)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 31 21:22:36 PDT 2024


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/98873

>From e8c44383ee9d9170f2b90bfc0f7a60ccb75a93f6 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 15 Jul 2024 15:00:05 +0800
Subject: [PATCH 1/5] [RISCV] Precommit test for tail duplication

---
 .../test/CodeGen/RISCV/riscv-tail-dup-size.ll | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll

diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
new file mode 100644
index 0000000000000..ae52773d71463
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+ at a = external dso_local local_unnamed_addr global i32
+ at b = external dso_local local_unnamed_addr global i32
+ at c = external dso_local local_unnamed_addr global i32
+
+declare i32 @foo(i32)
+
+define dso_local i32 @test(i32 %n) {
+; CHECK-O2-LABEL: test:
+; CHECK-O2:       # %bb.0: # %entry
+; CHECK-O2-NEXT:    sext.w a1, a0
+; CHECK-O2-NEXT:    blez a1, .LBB0_2
+; CHECK-O2-NEXT:  # %bb.1: # %if.then
+; CHECK-O2-NEXT:    lui a1, %hi(a)
+; CHECK-O2-NEXT:    lw a1, %lo(a)(a1)
+; CHECK-O2-NEXT:    mul a0, a1, a0
+; CHECK-O2-NEXT:    j .LBB0_3
+; CHECK-O2-NEXT:  .LBB0_2: # %if.else
+; CHECK-O2-NEXT:    lui a1, %hi(b)
+; CHECK-O2-NEXT:    lw a1, %lo(b)(a1)
+; CHECK-O2-NEXT:    divw a0, a1, a0
+; CHECK-O2-NEXT:  .LBB0_3: # %if.end
+; CHECK-O2-NEXT:    lui a1, %hi(c)
+; CHECK-O2-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O2-NEXT:    addi a0, a0, -1
+; CHECK-O2-NEXT:    mulw a0, a0, a1
+; CHECK-O2-NEXT:    tail foo
+;
+; CHECK-O3-LABEL: test:
+; CHECK-O3:       # %bb.0: # %entry
+; CHECK-O3-NEXT:    sext.w a1, a0
+; CHECK-O3-NEXT:    blez a1, .LBB0_2
+; CHECK-O3-NEXT:  # %bb.1: # %if.then
+; CHECK-O3-NEXT:    lui a1, %hi(a)
+; CHECK-O3-NEXT:    lw a1, %lo(a)(a1)
+; CHECK-O3-NEXT:    mul a0, a1, a0
+; CHECK-O3-NEXT:    j .LBB0_3
+; CHECK-O3-NEXT:  .LBB0_2: # %if.else
+; CHECK-O3-NEXT:    lui a1, %hi(b)
+; CHECK-O3-NEXT:    lw a1, %lo(b)(a1)
+; CHECK-O3-NEXT:    divw a0, a1, a0
+; CHECK-O3-NEXT:  .LBB0_3: # %if.end
+; CHECK-O3-NEXT:    lui a1, %hi(c)
+; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT:    addi a0, a0, -1
+; CHECK-O3-NEXT:    mulw a0, a0, a1
+; CHECK-O3-NEXT:    tail foo
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %va = load i32, ptr @a
+  %mul = mul nsw i32 %va, %n
+  br label %if.end
+
+if.else:
+  %vb = load i32, ptr @b
+  %div = sdiv i32 %vb, %n
+  br label %if.end
+
+if.end:
+  %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ]
+  %vc = load i32, ptr @c
+  %add = add nsw i32 %phi, -1
+  %arg = mul i32 %add, %vc
+  %ret = tail call i32 @foo(i32 %arg)
+  ret i32 %ret
+}

>From 34cdb61fb35f4b4c6a54fb929b6eff5a64aab39f Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 15 Jul 2024 15:10:46 +0800
Subject: [PATCH 2/5] [RISCV] Increase default tail duplication threshold to 6
 at -O3

This is just like AArch64.

Changing the threshold to 6 will increase the code size, but will
also decrease direct branches. CPUs with wide fetch/issue units
can benefit from it.

The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp       | 5 +++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h         | 2 ++
 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 9 ++++++---
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 0620c3fc12adc..b59401576420c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3763,6 +3763,11 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
   return ArrayRef(TargetFlags);
 }
 
+unsigned int
+RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+  return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
+}
+
 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
 bool RISCV::isSEXT_W(const MachineInstr &MI) {
   return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 025e12d81e60d..06ec0cff95912 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -288,6 +288,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   getSerializableMachineMemOperandTargetFlags() const override;
 
+  unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+
   unsigned getUndefInitOpcode(unsigned RegClassID) const override {
     switch (RegClassID) {
     case RISCV::VRRegClassID:
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index ae52773d71463..84373ce80843f 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -4,7 +4,7 @@
 
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
-; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O3
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
 
 @a = external dso_local local_unnamed_addr global i32
 @b = external dso_local local_unnamed_addr global i32
@@ -41,12 +41,15 @@ define dso_local i32 @test(i32 %n) {
 ; CHECK-O3-NEXT:    lui a1, %hi(a)
 ; CHECK-O3-NEXT:    lw a1, %lo(a)(a1)
 ; CHECK-O3-NEXT:    mul a0, a1, a0
-; CHECK-O3-NEXT:    j .LBB0_3
+; CHECK-O3-NEXT:    lui a1, %hi(c)
+; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT:    addi a0, a0, -1
+; CHECK-O3-NEXT:    mulw a0, a0, a1
+; CHECK-O3-NEXT:    tail foo
 ; CHECK-O3-NEXT:  .LBB0_2: # %if.else
 ; CHECK-O3-NEXT:    lui a1, %hi(b)
 ; CHECK-O3-NEXT:    lw a1, %lo(b)(a1)
 ; CHECK-O3-NEXT:    divw a0, a1, a0
-; CHECK-O3-NEXT:  .LBB0_3: # %if.end
 ; CHECK-O3-NEXT:    lui a1, %hi(c)
 ; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
 ; CHECK-O3-NEXT:    addi a0, a0, -1

>From 13796fcb5ae2fe65ad6f290ef4d41c801415559b Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 18 Jul 2024 12:06:54 +0800
Subject: [PATCH 3/5] Remove dso_local/local_unnamed_addr

---
 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index 84373ce80843f..ff0ff6e0dd3bd 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -6,13 +6,13 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
 
- at a = external dso_local local_unnamed_addr global i32
- at b = external dso_local local_unnamed_addr global i32
- at c = external dso_local local_unnamed_addr global i32
+ at a = external global i32
+ at b = external global i32
+ at c = external global i32
 
 declare i32 @foo(i32)
 
-define dso_local i32 @test(i32 %n) {
+define i32 @test(i32 %n) {
 ; CHECK-O2-LABEL: test:
 ; CHECK-O2:       # %bb.0: # %entry
 ; CHECK-O2-NEXT:    sext.w a1, a0

>From 8c63d85347ff45be150d02feda1aa4029cdd60b5 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 18 Jul 2024 12:11:24 +0800
Subject: [PATCH 4/5] The default threshold at -O2 should be 2

---
 llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
index ff0ff6e0dd3bd..0508016736004 100644
--- a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
 ; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
 
-; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
-; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=2 < %s | FileCheck %s --check-prefix=CHECK-O2
 ; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
 
 @a = external global i32

>From f7e99ad8b6eb54e9bc59c3de390cdf0326cf4ea2 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 18 Jul 2024 12:26:16 +0800
Subject: [PATCH 5/5] Add TailDupAggressiveThreshold to RISCVTuneInfo

---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 7 ++++---
 llvm/lib/Target/RISCV/RISCVInstrInfo.h   | 2 +-
 llvm/lib/Target/RISCV/RISCVProcessors.td | 5 ++++-
 llvm/lib/Target/RISCV/RISCVSubtarget.h   | 7 +++++++
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index b59401576420c..9dd79027d7a16 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3763,9 +3763,10 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
   return ArrayRef(TargetFlags);
 }
 
-unsigned int
-RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
-  return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
+unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+  return OptLevel >= CodeGenOptLevel::Aggressive
+             ? STI.getTailDupAggressiveThreshold()
+             : 2;
 }
 
 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 06ec0cff95912..1612f56a8b506 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -288,7 +288,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   getSerializableMachineMemOperandTargetFlags() const override;
 
-  unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+  unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
 
   unsigned getUndefInitOpcode(unsigned RegClassID) const override {
     switch (RegClassID) {
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 1729bc0282f51..25b24980e0bf6 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -21,6 +21,9 @@ class RISCVTuneInfo {
   bits<32> MaxPrefetchIterationsAhead = -1;
 
   bits<32> MinimumJumpTableEntries = 5;
+
+  // Tail duplication threshold at -O3.
+  bits<32> TailDupAggressiveThreshold = 6;
 }
 
 def RISCVTuneInfoTable : GenericTable {
@@ -29,7 +32,7 @@ def RISCVTuneInfoTable : GenericTable {
   let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment",
                 "CacheLineSize", "PrefetchDistance",
                 "MinPrefetchStride", "MaxPrefetchIterationsAhead",
-                "MinimumJumpTableEntries"];
+                "MinimumJumpTableEntries", "TailDupAggressiveThreshold"];
 }
 
 def getRISCVTuneInfo : SearchIndex {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index d38952e5196f0..ea54ff1df0b7c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -50,6 +50,9 @@ struct RISCVTuneInfo {
   unsigned MaxPrefetchIterationsAhead;
 
   unsigned MinimumJumpTableEntries;
+
+  // Tail duplication threshold at -O3.
+  unsigned TailDupAggressiveThreshold;
 };
 
 #define GET_RISCVTuneInfoTable_DECL
@@ -300,6 +303,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
 
   unsigned getMinimumJumpTableEntries() const;
 
+  unsigned getTailDupAggressiveThreshold() const {
+    return TuneInfo->TailDupAggressiveThreshold;
+  }
+
   bool supportsInitUndef() const override { return hasVInstructions(); }
 };
 } // End llvm namespace



More information about the llvm-commits mailing list