[llvm] 35ea326 - [AMDGPU] Try to avoid inserting duplicate s_inst_prefetch
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 14 00:21:35 PDT 2022
Author: Carl Ritson
Date: 2022-04-14T16:06:24+09:00
New Revision: 35ea326047ef1220f26dc69593db9842a7dfeec1
URL: https://github.com/llvm/llvm-project/commit/35ea326047ef1220f26dc69593db9842a7dfeec1
DIFF: https://github.com/llvm/llvm-project/commit/35ea326047ef1220f26dc69593db9842a7dfeec1.diff
LOG: [AMDGPU] Try to avoid inserting duplicate s_inst_prefetch
Check for existing s_inst_prefetch instructions when
configuring prefetches during loop alignment.
Reviewed By: rampitec, foad
Differential Revision: https://reviews.llvm.org/D123569
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index bb65557ae082e..0c344df3f018a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12312,13 +12312,17 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
MachineBasicBlock *Exit = ML->getExitBlock();
if (Pre && Exit) {
- BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(),
- TII->get(AMDGPU::S_INST_PREFETCH))
- .addImm(1); // prefetch 2 lines behind PC
-
- BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(),
- TII->get(AMDGPU::S_INST_PREFETCH))
- .addImm(2); // prefetch 1 line behind PC
+ auto PreTerm = Pre->getFirstTerminator();
+ if (PreTerm == Pre->begin() ||
+ std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
+ BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(1); // prefetch 2 lines behind PC
+
+ auto ExitHead = Exit->getFirstNonDebugInstr();
+ if (ExitHead == Exit->end() ||
+ ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
+ BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(2); // prefetch 1 line behind PC
}
return CacheLineAlign;
diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
index cbdb00ea83ab6..26d0c050c8842 100644
--- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
@@ -11,8 +11,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
; GFX10-NEXT: s_mov_b32 s1, 0
; GFX10-NEXT: ; implicit-def: $sgpr2
; GFX10-NEXT: s_inst_prefetch 0x1
-; GFX10-NEXT: s_inst_prefetch 0x1
-; GFX10-NEXT: s_inst_prefetch 0x1
; GFX10-NEXT: s_branch .LBB0_2
; GFX10-NEXT: .p2align 6
; GFX10-NEXT: .LBB0_1: ; %Flow
@@ -50,8 +48,6 @@ define amdgpu_cs void @_amdgpu_cs_main(float %0, i32 %1) {
; GFX10-NEXT: s_branch .LBB0_1
; GFX10-NEXT: .LBB0_4: ; %loop0_merge
; GFX10-NEXT: s_inst_prefetch 0x2
-; GFX10-NEXT: s_inst_prefetch 0x2
-; GFX10-NEXT: s_inst_prefetch 0x2
; GFX10-NEXT: s_endpgm
branch1_true:
br label %2
More information about the llvm-commits
mailing list