[llvm] 691ff46 - [AMDGPU] Skip CFIInstructions in SIInsertWaitcnts

Scott Linder via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 17 09:41:28 PDT 2020


Author: Scott Linder
Date: 2020-06-17T12:41:03-04:00
New Revision: 691ff4682f8c263082750d43e03ea378841acc8d

URL: https://github.com/llvm/llvm-project/commit/691ff4682f8c263082750d43e03ea378841acc8d
DIFF: https://github.com/llvm/llvm-project/commit/691ff4682f8c263082750d43e03ea378841acc8d.diff

LOG: [AMDGPU] Skip CFIInstructions in SIInsertWaitcnts

Summary:
CFI emitted during PEI at the beginning of the prologue needs to apply
to any inserted waitcnts on function entry.

Reviewers: arsenm, t-tye, RamNalamothu

Reviewed By: arsenm

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm, #debug-info

Differential Revision: https://reviews.llvm.org/D76881

Added: 
    llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
    llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 67c7ff1fcda4..2a157eb20ab4 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1632,13 +1632,15 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
     // TODO: Could insert earlier and schedule more liberally with operations
     // that only use caller preserved registers.
     MachineBasicBlock &EntryBB = MF.front();
+    MachineBasicBlock::iterator I = EntryBB.begin();
+    for (MachineBasicBlock::iterator E = EntryBB.end();
+         I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
+      ;
+    BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
     if (ST->hasVscnt())
-      BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(),
-              TII->get(AMDGPU::S_WAITCNT_VSCNT))
-      .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
-      .addImm(0);
-    BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
-      .addImm(0);
+      BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
+          .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+          .addImm(0);
 
     Modified = true;
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
index 06a4d90dfeb3..85ea50cb4cbf 100644
--- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
@@ -7,12 +7,12 @@ define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unna
 ; GCN-NEXT:    .file 0
 ; GCN-NEXT:    .loc 0 3 0 ; /tmp/dbg.cl:3:0
 ; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:  .Ltmp0:
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:  .Ltmp0:
 ; GCN-NEXT:    .loc 0 4 5 prologue_end ; /tmp/dbg.cl:4:5
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .Ltmp1:
@@ -25,14 +25,14 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
 ; GCN:       .Lfunc_begin1:
 ; GCN-NEXT:    .loc 0 7 0 ; /tmp/dbg.cl:7:0
 ; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:  .Ltmp2:
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr5
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_multi_arg:arg1 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr4
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f32_multi_arg:arg0 <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:  .Ltmp2:
 ; GCN-NEXT:    .loc 0 8 17 prologue_end ; /tmp/dbg.cl:8:17
 ; GCN-NEXT:    v_add_f32_e32 v0, v4, v0
 ; GCN-NEXT:  .Ltmp3:
@@ -57,10 +57,10 @@ define hidden <4 x half> @split_v4f16_arg(<4 x half> returned %arg) local_unname
 ; GCN:       .Lfunc_begin2:
 ; GCN-NEXT:    .loc 0 11 0 is_stmt 1 ; /tmp/dbg.cl:11:0
 ; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:  .Ltmp8:
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v4f16_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:  .Ltmp8:
 ; GCN-NEXT:    .loc 0 12 5 prologue_end ; /tmp/dbg.cl:12:5
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .Ltmp9:
@@ -73,10 +73,10 @@ define hidden double @split_f64_arg(double returned %arg) local_unnamed_addr #0
 ; GCN:       .Lfunc_begin3:
 ; GCN-NEXT:    .loc 0 15 0 ; /tmp/dbg.cl:15:0
 ; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:  .Ltmp10:
 ; GCN-NEXT:    ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
 ; GCN-NEXT:    ;DEBUG_VALUE: split_f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:  .Ltmp10:
 ; GCN-NEXT:    .loc 0 16 5 prologue_end ; /tmp/dbg.cl:16:5
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .Ltmp11:
@@ -89,12 +89,12 @@ define hidden <2 x double> @split_v2f64_arg(<2 x double> returned %arg) local_un
 ; GCN:       .Lfunc_begin4:
 ; GCN-NEXT:    .loc 0 19 0 ; /tmp/dbg.cl:19:0
 ; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:  .Ltmp12:
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 96 32] $vgpr3
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 64 32] $vgpr2
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
 ; GCN-NEXT:    ;DEBUG_VALUE: split_v2f64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:  .Ltmp12:
 ; GCN-NEXT:    .loc 0 20 5 prologue_end ; /tmp/dbg.cl:20:5
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .Ltmp13:
@@ -107,10 +107,10 @@ define hidden i64 @split_i64_arg(i64 returned %arg) local_unnamed_addr #0 !dbg !
 ; GCN:       .Lfunc_begin5:
 ; GCN-NEXT:    .loc 0 23 0 ; /tmp/dbg.cl:23:0
 ; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:  .Ltmp14:
 ; GCN-NEXT:    ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
 ; GCN-NEXT:    ;DEBUG_VALUE: split_i64_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:  .Ltmp14:
 ; GCN-NEXT:    .loc 0 24 5 prologue_end ; /tmp/dbg.cl:24:5
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .Ltmp15:
@@ -123,10 +123,10 @@ define hidden i8 addrspace(1)* @split_ptr_arg(i8 addrspace(1)* readnone returned
 ; GCN:       .Lfunc_begin6:
 ; GCN-NEXT:    .loc 0 27 0 ; /tmp/dbg.cl:27:0
 ; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:  .Ltmp16:
 ; GCN-NEXT:    ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 32 32] $vgpr1
 ; GCN-NEXT:    ;DEBUG_VALUE: split_ptr_arg:arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef, DW_OP_LLVM_fragment 0 32] $vgpr0
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:  .Ltmp16:
 ; GCN-NEXT:    .loc 0 28 5 prologue_end ; /tmp/dbg.cl:28:5
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ; GCN-NEXT:  .Ltmp17:

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
index f323cf7d76f2..384cb1b4699d 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GCN
 
-define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) {
+define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 {
 ; GCN-LABEL: vgpr_descriptor_waterfall_loop_idom_update:
 ; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    ; implicit-def: $vcc_hi
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
-; GCN-NEXT:    ; implicit-def: $vcc_hi
 ; GCN-NEXT:  BB0_1: ; %bb0
 ; GCN-NEXT:    ; =>This Loop Header: Depth=1
 ; GCN-NEXT:    ; Child Loop BB0_2 Depth 2

diff  --git a/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir
new file mode 100644
index 000000000000..00a2b78e903f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-skip-meta.mir
@@ -0,0 +1,96 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts  %s -o - | FileCheck %s
+
+# Ensure we insert waitcnts after any meta instructions at the start of
+# non-kernel functions. Without this, the inserted waitcnts can affect e.g. the
+# PC ranges covered by CFI and debug values.
+
+---
+# CHECK-LABEL: name: skip_implicit_def{{$}}
+# CHECK: IMPLICIT_DEF
+# CHECK: S_WAITCNT
+name: skip_implicit_def
+machineFunctionInfo:
+body: |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+...
+---
+# CHECK-LABEL: name: skip_kill{{$}}
+# CHECK: KILL
+# CHECK: S_WAITCNT
+name: skip_kill
+machineFunctionInfo:
+body: |
+  bb.0:
+    KILL $sgpr0
+...
+---
+# CHECK-LABEL: name: skip_cfi{{$}}
+# CHECK: CFI_INSTRUCTION
+# CHECK: S_WAITCNT
+name: skip_cfi
+machineFunctionInfo:
+body: |
+  bb.0:
+    CFI_INSTRUCTION undefined $sgpr0
+...
+---
+# CHECK-LABEL: name: skip_eh_label{{$}}
+# CHECK: EH_LABEL
+# CHECK: S_WAITCNT
+name: skip_eh_label
+machineFunctionInfo:
+body: |
+  bb.0:
+    EH_LABEL 0
+...
+---
+# CHECK-LABEL: name: skip_gc_label{{$}}
+# CHECK: GC_LABEL
+# CHECK: S_WAITCNT
+name: skip_gc_label
+machineFunctionInfo:
+body: |
+  bb.0:
+    GC_LABEL 0
+...
+---
+# CHECK-LABEL: name: skip_dbg_value{{$}}
+# CHECK: DBG_VALUE
+# CHECK: S_WAITCNT
+name: skip_dbg_value
+machineFunctionInfo:
+body: |
+  bb.0:
+    DBG_VALUE 0
+...
+---
+# CHECK-LABEL: name: skip_dbg_label{{$}}
+# CHECK: DBG_LABEL
+# CHECK: S_WAITCNT
+name: skip_dbg_label
+machineFunctionInfo:
+body: |
+  bb.0:
+    DBG_LABEL 0
+...
+---
+# CHECK-LABEL: name: skip_lifetime_start{{$}}
+# CHECK: LIFETIME_START
+# CHECK: S_WAITCNT
+name: skip_lifetime_start
+machineFunctionInfo:
+body: |
+  bb.0:
+    LIFETIME_START 0
+...
+---
+# CHECK-LABEL: name: skip_lifetime_end{{$}}
+# CHECK: LIFETIME_END
+# CHECK: S_WAITCNT
+name: skip_lifetime_end
+machineFunctionInfo:
+body: |
+  bb.0:
+    LIFETIME_END 0
+...


        


More information about the llvm-commits mailing list