[llvm] 691ff46 - [AMDGPU] Skip CFIInstructions in SIInsertWaitcnts
Scott Linder via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 17 09:41:28 PDT 2020
Author: Scott Linder
Date: 2020-06-17T12:41:03-04:00
New Revision: 691ff4682f8c263082750d43e03ea378841acc8d
URL: https://github.com/llvm/llvm-project/commit/691ff4682f8c263082750d43e03ea378841acc8d
DIFF: https://github.com/llvm/llvm-project/commit/691ff4682f8c263082750d43e03ea378841acc8d.diff
LOG: [AMDGPU] Skip CFIInstructions in SIInsertWaitcnts
Summary:
CFI emitted during PEI at the beginning of the prologue needs to apply
to any inserted waitcnts on function entry.
Reviewers: arsenm, t-tye, RamNalamothu
Reviewed By: arsenm
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits
Tags: #llvm, #debug-info
Differential Revision: https://reviews.llvm.org/D76881
Added:
llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir
Modified:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 67c7ff1fcda4..2a157eb20ab4 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1632,13 +1632,15 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
+ MachineBasicBlock::iterator I = EntryBB.begin();
+ for (MachineBasicBlock::iterator E = EntryBB.end();
+ I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
+ ;
+ BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
if (ST->hasVscnt())
- BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(),
- TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(0);
- BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(0);
+ BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
Modified = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
index 06a4d90dfeb3..85ea50cb4cbf 100644
--- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
@@ -7,12 +7,12 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna
; GCN-NEXT: .file 0
; GCN-NEXT: .loc 0 3 0 ; /tmp/dbg.cl:3:0
; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: .Ltmp0:
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: .Ltmp0:
; GCN-NEXT: .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp1:
@@ -25,14 +25,14 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
; GCN: .Lfunc_begin1:
; GCN-NEXT: .loc 0 7 0 ; /tmp/dbg.cl:7:0
; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: .Ltmp2:
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr5
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr4
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: .Ltmp2:
; GCN-NEXT: .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17
; GCN-NEXT: v_add_f32_e32 v0, v4, v0
; GCN-NEXT: .Ltmp3:
@@ -57,10 +57,10 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname
; GCN: .Lfunc_begin2:
; GCN-NEXT: .loc 0 11 0 is_stmt 1 ; /tmp/dbg.cl:11:0
; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: .Ltmp8:
; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: .Ltmp8:
; GCN-NEXT: .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp9:
@@ -73,10 +73,10 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0
; GCN: .Lfunc_begin3:
; GCN-NEXT: .loc 0 15 0 ; /tmp/dbg.cl:15:0
; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: .Ltmp10:
; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: .Ltmp10:
; GCN-NEXT: .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp11:
@@ -89,12 +89,12 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un
; GCN: .Lfunc_begin4:
; GCN-NEXT: .loc 0 19 0 ; /tmp/dbg.cl:19:0
; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: .Ltmp12:
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: .Ltmp12:
; GCN-NEXT: .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp13:
@@ -107,10 +107,10 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg !
; GCN: .Lfunc_begin5:
; GCN-NEXT: .loc 0 23 0 ; /tmp/dbg.cl:23:0
; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: .Ltmp14:
; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: .Ltmp14:
; GCN-NEXT: .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp15:
@@ -123,10 +123,10 @@ define hidden i8 addrspace(1)* @split_ptr_arg(i8 addrspace(1)* readnone returned
; GCN: .Lfunc_begin6:
; GCN-NEXT: .loc 0 27 0 ; /tmp/dbg.cl:27:0
; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: .Ltmp16:
; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
; GCN-NEXT: ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: .Ltmp16:
; GCN-NEXT: .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5
; GCN-NEXT: s_setpc_b64 s[30:31]
; GCN-NEXT: .Ltmp17:
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
index f323cf7d76f2..384cb1b4699d 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GCN
-define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) {
+define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 {
; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update:
; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
-; GCN-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: BB0_1: ; %bb0
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB0_2 Depth 2
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir
new file mode 100644
index 000000000000..00a2b78e903f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir
@@ -0,0 +1,96 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck %s
+
+# Ensure we insert waitcnts after any meta instructions at the start of
+# non-kernel functions. Without this, the inserted waitcnts can affect e.g. the
+# PC ranges covered by CFI and debug values.
+
+---
+# CHECK-LABEL: name: skip_implicit_def{{$}}
+# CHECK: IMPLICIT_DEF
+# CHECK: S_WAITCNT
+name: skip_implicit_def
+machineFunctionInfo:
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+...
+---
+# CHECK-LABEL: name: skip_kill{{$}}
+# CHECK: KILL
+# CHECK: S_WAITCNT
+name: skip_kill
+machineFunctionInfo:
+body: |
+ bb.0:
+ KILL $sgpr0
+...
+---
+# CHECK-LABEL: name: skip_cfi{{$}}
+# CHECK: CFI_INSTRUCTION
+# CHECK: S_WAITCNT
+name: skip_cfi
+machineFunctionInfo:
+body: |
+ bb.0:
+ CFI_INSTRUCTION undefined $sgpr0
+...
+---
+# CHECK-LABEL: name: skip_eh_label{{$}}
+# CHECK: EH_LABEL
+# CHECK: S_WAITCNT
+name: skip_eh_label
+machineFunctionInfo:
+body: |
+ bb.0:
+ EH_LABEL 0
+...
+---
+# CHECK-LABEL: name: skip_gc_label{{$}}
+# CHECK: GC_LABEL
+# CHECK: S_WAITCNT
+name: skip_gc_label
+machineFunctionInfo:
+body: |
+ bb.0:
+ GC_LABEL 0
+...
+---
+# CHECK-LABEL: name: skip_dbg_value{{$}}
+# CHECK: DBG_VALUE
+# CHECK: S_WAITCNT
+name: skip_dbg_value
+machineFunctionInfo:
+body: |
+ bb.0:
+ DBG_VALUE 0
+...
+---
+# CHECK-LABEL: name: skip_dbg_label{{$}}
+# CHECK: DBG_LABEL
+# CHECK: S_WAITCNT
+name: skip_dbg_label
+machineFunctionInfo:
+body: |
+ bb.0:
+ DBG_LABEL 0
+...
+---
+# CHECK-LABEL: name: skip_lifetime_start{{$}}
+# CHECK: LIFETIME_START
+# CHECK: S_WAITCNT
+name: skip_lifetime_start
+machineFunctionInfo:
+body: |
+ bb.0:
+ LIFETIME_START 0
+...
+---
+# CHECK-LABEL: name: skip_lifetime_end{{$}}
+# CHECK: LIFETIME_END
+# CHECK: S_WAITCNT
+name: skip_lifetime_end
+machineFunctionInfo:
+body: |
+ bb.0:
+ LIFETIME_END 0
+...
More information about the llvm-commits
mailing list