[llvm] [RISCV] Increase default tail duplication threshold to 6 at -O3 (PR #98873)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 15 01:41:31 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Pengcheng Wang (wangpc-pp)

<details>
<summary>Changes</summary>

This is just like AArch64.

Changing the threshold to 6 will increase the code size, but will
also decrease direct branches. CPUs with wide fetch/issue units
can benefit from it.

The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.


---
Full diff: https://github.com/llvm/llvm-project/pull/98873.diff


3 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+5) 
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.h (+2) 
- (added) llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll (+79) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 5e1b5284751f4..5f63558739003 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3763,6 +3763,11 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
   return ArrayRef(TargetFlags);
 }
 
+unsigned int
+RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
+  return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
+}
+
 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
 bool RISCV::isSEXT_W(const MachineInstr &MI) {
   return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index f0c0953a3e56a..c4c8a18bda6a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -286,6 +286,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   getSerializableMachineMemOperandTargetFlags() const override;
 
+  unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
+
   unsigned getUndefInitOpcode(unsigned RegClassID) const override {
     switch (RegClassID) {
     case RISCV::VRRegClassID:
diff --git a/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
new file mode 100644
index 0000000000000..84373ce80843f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-tail-dup-size.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+ at a = external dso_local local_unnamed_addr global i32
+ at b = external dso_local local_unnamed_addr global i32
+ at c = external dso_local local_unnamed_addr global i32
+
+declare i32 @foo(i32)
+
+define dso_local i32 @test(i32 %n) {
+; CHECK-O2-LABEL: test:
+; CHECK-O2:       # %bb.0: # %entry
+; CHECK-O2-NEXT:    sext.w a1, a0
+; CHECK-O2-NEXT:    blez a1, .LBB0_2
+; CHECK-O2-NEXT:  # %bb.1: # %if.then
+; CHECK-O2-NEXT:    lui a1, %hi(a)
+; CHECK-O2-NEXT:    lw a1, %lo(a)(a1)
+; CHECK-O2-NEXT:    mul a0, a1, a0
+; CHECK-O2-NEXT:    j .LBB0_3
+; CHECK-O2-NEXT:  .LBB0_2: # %if.else
+; CHECK-O2-NEXT:    lui a1, %hi(b)
+; CHECK-O2-NEXT:    lw a1, %lo(b)(a1)
+; CHECK-O2-NEXT:    divw a0, a1, a0
+; CHECK-O2-NEXT:  .LBB0_3: # %if.end
+; CHECK-O2-NEXT:    lui a1, %hi(c)
+; CHECK-O2-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O2-NEXT:    addi a0, a0, -1
+; CHECK-O2-NEXT:    mulw a0, a0, a1
+; CHECK-O2-NEXT:    tail foo
+;
+; CHECK-O3-LABEL: test:
+; CHECK-O3:       # %bb.0: # %entry
+; CHECK-O3-NEXT:    sext.w a1, a0
+; CHECK-O3-NEXT:    blez a1, .LBB0_2
+; CHECK-O3-NEXT:  # %bb.1: # %if.then
+; CHECK-O3-NEXT:    lui a1, %hi(a)
+; CHECK-O3-NEXT:    lw a1, %lo(a)(a1)
+; CHECK-O3-NEXT:    mul a0, a1, a0
+; CHECK-O3-NEXT:    lui a1, %hi(c)
+; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT:    addi a0, a0, -1
+; CHECK-O3-NEXT:    mulw a0, a0, a1
+; CHECK-O3-NEXT:    tail foo
+; CHECK-O3-NEXT:  .LBB0_2: # %if.else
+; CHECK-O3-NEXT:    lui a1, %hi(b)
+; CHECK-O3-NEXT:    lw a1, %lo(b)(a1)
+; CHECK-O3-NEXT:    divw a0, a1, a0
+; CHECK-O3-NEXT:    lui a1, %hi(c)
+; CHECK-O3-NEXT:    lw a1, %lo(c)(a1)
+; CHECK-O3-NEXT:    addi a0, a0, -1
+; CHECK-O3-NEXT:    mulw a0, a0, a1
+; CHECK-O3-NEXT:    tail foo
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %va = load i32, ptr @a
+  %mul = mul nsw i32 %va, %n
+  br label %if.end
+
+if.else:
+  %vb = load i32, ptr @b
+  %div = sdiv i32 %vb, %n
+  br label %if.end
+
+if.end:
+  %phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ]
+  %vc = load i32, ptr @c
+  %add = add nsw i32 %phi, -1
+  %arg = mul i32 %add, %vc
+  %ret = tail call i32 @foo(i32 %arg)
+  ret i32 %ret
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/98873


More information about the llvm-commits mailing list