[llvm] cd88044 - [CodeGen][AArch64] Add TargetInstrInfo hook to modify the TailDuplicateSize default threshold

Mon Feb 8 05:28:12 PST 2021

Author: Nicholas Guy
Date: 2021-02-08T13:28:00Z
New Revision: cd880442ae66561e45257f2440321a0a671acae7

URL: https://github.com/llvm/llvm-project/commit/cd880442ae66561e45257f2440321a0a671acae7
DIFF: https://github.com/llvm/llvm-project/commit/cd880442ae66561e45257f2440321a0a671acae7.diff

LOG: [CodeGen][AArch64] Add TargetInstrInfo hook to modify the TailDuplicateSize default threshold

Different targets might handle branch performance differently, so this patch allows for
targets to specify the TailDuplicateSize threshold. Said threshold defines how small a branch
can be and still be duplicated to generate straight-line code instead.
This patch also specifies said override values for the AArch64 subtarget.

Differential Revision: https://reviews.llvm.org/D95631

Added: 
    llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll

Modified: 
    llvm/include/llvm/CodeGen/TargetInstrInfo.h
    llvm/lib/CodeGen/MachineBlockPlacement.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 2e1806aa8bfc..8db4e2c5b787 100644

--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1937,6 +1937,13 @@ class TargetInstrInfo : public MCInstrInfo {
     return Formatter.get();
   }
 
+  /// Returns the target-specific default value for tail duplication.
+  /// This value will be used if the tail-dup-placement-threshold argument is
+  /// not provided.
+  virtual unsigned getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
+    return OptLevel >= CodeGenOpt::Aggressive ? 4 : 2;
+  }
+
 private:
   mutable std::unique_ptr<MIRFormatter> Formatter;
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;

diff  --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 048baa460e49..2e7584876b54 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3337,6 +3337,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
       TailDupSize = TailDupPlacementAggressiveThreshold;
   }
 
+  // If there's no threshold provided through options, query the target
+  // information for a threshold instead.
+  if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
+      (PassConfig->getOptLevel() < CodeGenOpt::Aggressive ||
+       TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
+    TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
+
   if (allowTailDupPlacement()) {
     MPDT = &getAnalysis<MachinePostDominatorTree>();
     bool OptForSize = MF.getFunction().hasOptSize() ||

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6b38e216a854..0e60795caf47 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -7183,6 +7183,11 @@ bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
   return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
 }
 
+unsigned int
+AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
+  return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2;
+}
+
 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
   if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
     return AArch64::BLRNoIP;

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 7434987e0617..3406bc175132 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -299,6 +299,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
                                                  Register Reg) const override;
 
+  unsigned int getTailDuplicateSize(CodeGenOpt::Level OptLevel) const override;
+
   static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
                                                   int64_t &NumBytes,
                                                   int64_t &NumPredicateVectors,

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll
new file mode 100644
index 000000000000..256ea1d0cf6a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=aarch64-none-linux -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+; RUN: llc -mtriple=aarch64-none-linux -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=aarch64-none-linux -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llc -mtriple=aarch64-none-linux -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
+
+%a = type { %a*, i32, %b }
+%b = type { %c }
+%c = type { i32, i32, [31 x i8] }
+
+ at global_ptr = dso_local local_unnamed_addr global %a* null, align 8
+ at global_int = dso_local local_unnamed_addr global i32 0, align 4
+
+define dso_local void @testcase(%a** nocapture %arg){
+; CHECK-O2-LABEL: testcase:
+; CHECK-O2:       // %bb.0: // %entry
+; CHECK-O2-NEXT:    adrp x8, global_ptr
+; CHECK-O2-NEXT:    ldr x9, [x8, :lo12:global_ptr]
+; CHECK-O2-NEXT:    cbz x9, .LBB0_2
+; CHECK-O2-NEXT:  // %bb.1: // %if.then
+; CHECK-O2-NEXT:    ldr x9, [x9]
+; CHECK-O2-NEXT:    str x9, [x0]
+; CHECK-O2-NEXT:    ldr x8, [x8, :lo12:global_ptr]
+; CHECK-O2-NEXT:    b .LBB0_3
+; CHECK-O2-NEXT:  .LBB0_2:
+; CHECK-O2-NEXT:    mov x8, xzr
+; CHECK-O2-NEXT:  .LBB0_3: // %if.end
+; CHECK-O2-NEXT:    adrp x9, global_int
+; CHECK-O2-NEXT:    ldr w1, [x9, :lo12:global_int]
+; CHECK-O2-NEXT:    add x2, x8, #16 // =16
+; CHECK-O2-NEXT:    mov w0, #10
+; CHECK-O2-NEXT:    b externalfunc
+;
+; CHECK-O3-LABEL: testcase:
+; CHECK-O3:       // %bb.0: // %entry
+; CHECK-O3-NEXT:    adrp x8, global_ptr
+; CHECK-O3-NEXT:    ldr x9, [x8, :lo12:global_ptr]
+; CHECK-O3-NEXT:    cbz x9, .LBB0_2
+; CHECK-O3-NEXT:  // %bb.1: // %if.then
+; CHECK-O3-NEXT:    ldr x9, [x9]
+; CHECK-O3-NEXT:    str x9, [x0]
+; CHECK-O3-NEXT:    ldr x8, [x8, :lo12:global_ptr]
+; CHECK-O3-NEXT:    adrp x9, global_int
+; CHECK-O3-NEXT:    ldr w1, [x9, :lo12:global_int]
+; CHECK-O3-NEXT:    add x2, x8, #16 // =16
+; CHECK-O3-NEXT:    mov w0, #10
+; CHECK-O3-NEXT:    b externalfunc
+; CHECK-O3-NEXT:  .LBB0_2:
+; CHECK-O3-NEXT:    mov x8, xzr
+; CHECK-O3-NEXT:    adrp x9, global_int
+; CHECK-O3-NEXT:    ldr w1, [x9, :lo12:global_int]
+; CHECK-O3-NEXT:    add x2, x8, #16 // =16
+; CHECK-O3-NEXT:    mov w0, #10
+; CHECK-O3-NEXT:    b externalfunc
+entry:
+  %0 = load %a*, %a** @global_ptr, align 8
+  %cmp.not = icmp eq %a* %0, null
+  br i1 %cmp.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %1 = getelementptr inbounds %a, %a* %0, i64 0, i32 0
+  %2 = load %a*, %a** %1, align 8
+  store %a* %2, %a** %arg, align 8
+  %.pre = load %a*, %a** @global_ptr, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %3 = phi %a* [ %.pre, %if.then ], [ null, %entry ]
+  %4 = load i32, i32* @global_int, align 4
+  %5 = getelementptr inbounds %a, %a* %3, i64 0, i32 2, i32 0, i32 1
+  tail call void @externalfunc(i32 10, i32 %4, i32* nonnull %5)
+  ret void
+}
+
+declare dso_local void @externalfunc(i32, i32, i32*)