[llvm] [AMDGPU] Add flag to force emit s_nop (PR #117839)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 2 11:31:43 PST 2024


================
@@ -0,0 +1,185 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --amdgpu-snop-padding=0 --run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --amdgpu-snop-padding=1 --run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN-NOP1 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --amdgpu-snop-padding=20 --run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN-NOP20 %s
+
+# do not insert s_nop between terminators
+
+---
+name: multiple-terminators
+body:             |
+  ; GCN-LABEL: name: multiple-terminators
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT:   liveins: $sgpr0, $sgpr1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NEXT:   S_BRANCH %bb.2, implicit $scc
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   $sgpr1 = S_ADD_U32 $sgpr1, 2, implicit-def $scc
+  ;
+  ; GCN-NOP1-LABEL: name: multiple-terminators
+  ; GCN-NOP1: bb.0:
+  ; GCN-NOP1-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NOP1-NEXT:   liveins: $sgpr0, $sgpr1
+  ; GCN-NOP1-NEXT: {{  $}}
+  ; GCN-NOP1-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NOP1-NEXT:   S_BRANCH %bb.2, implicit $scc
+  ; GCN-NOP1-NEXT: {{  $}}
+  ; GCN-NOP1-NEXT: bb.1:
+  ; GCN-NOP1-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NOP1-NEXT: {{  $}}
+  ; GCN-NOP1-NEXT:   S_NOP 0
+  ; GCN-NOP1-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+  ; GCN-NOP1-NEXT: {{  $}}
+  ; GCN-NOP1-NEXT: bb.2:
+  ; GCN-NOP1-NEXT:   S_NOP 0
+  ; GCN-NOP1-NEXT:   $sgpr1 = S_ADD_U32 $sgpr1, 2, implicit-def $scc
+  ;
+  ; GCN-NOP20-LABEL: name: multiple-terminators
+  ; GCN-NOP20: bb.0:
+  ; GCN-NOP20-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NOP20-NEXT:   liveins: $sgpr0, $sgpr1
+  ; GCN-NOP20-NEXT: {{  $}}
+  ; GCN-NOP20-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit $scc
+  ; GCN-NOP20-NEXT:   S_BRANCH %bb.2, implicit $scc
+  ; GCN-NOP20-NEXT: {{  $}}
+  ; GCN-NOP20-NEXT: bb.1:
+  ; GCN-NOP20-NEXT:   successors: %bb.2(0x80000000)
+  ; GCN-NOP20-NEXT: {{  $}}
+  ; GCN-NOP20-NEXT:   S_NOP 7
+  ; GCN-NOP20-NEXT:   S_NOP 7
+  ; GCN-NOP20-NEXT:   S_NOP 3
+  ; GCN-NOP20-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+  ; GCN-NOP20-NEXT: {{  $}}
+  ; GCN-NOP20-NEXT: bb.2:
+  ; GCN-NOP20-NEXT:   S_NOP 7
+  ; GCN-NOP20-NEXT:   S_NOP 7
+  ; GCN-NOP20-NEXT:   S_NOP 3
+  ; GCN-NOP20-NEXT:   $sgpr1 = S_ADD_U32 $sgpr1, 2, implicit-def $scc
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2, implicit $scc
+  bb.1:
+    $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+  bb.2:
+    $sgpr1 = S_ADD_U32 $sgpr1, 2, implicit-def $scc
+...
+
+# insert s_nop inside bundles
+
+---
+name:            bundle
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: bundle
+    ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 {
+    ; GCN-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64
+    ; GCN-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+    ; GCN-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 2, implicit-def $scc, implicit $scc
+    ; GCN-NEXT: }
+    ; GCN-NEXT: S_ENDPGM 0
+    ;
+    ; GCN-NOP1-LABEL: name: bundle
+    ; GCN-NOP1: S_NOP 0
+    ; GCN-NOP1-NEXT: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+    ; GCN-NOP1-NEXT: S_NOP 0
+    ; GCN-NOP1-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 {
+    ; GCN-NOP1-NEXT:   S_NOP 0
+    ; GCN-NOP1-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64
+    ; GCN-NOP1-NEXT:   S_NOP 0
+    ; GCN-NOP1-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+    ; GCN-NOP1-NEXT:   S_NOP 0
+    ; GCN-NOP1-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 2, implicit-def $scc, implicit $scc
+    ; GCN-NOP1-NEXT: }
+    ; GCN-NOP1-NEXT: S_ENDPGM 0
+    ;
+    ; GCN-NOP20-LABEL: name: bundle
+    ; GCN-NOP20: S_NOP 7
+    ; GCN-NOP20-NEXT: S_NOP 7
+    ; GCN-NOP20-NEXT: S_NOP 3
+    ; GCN-NOP20-NEXT: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+    ; GCN-NOP20-NEXT: S_NOP 7
+    ; GCN-NOP20-NEXT: S_NOP 7
+    ; GCN-NOP20-NEXT: S_NOP 3
+    ; GCN-NOP20-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 {
+    ; GCN-NOP20-NEXT:   S_NOP 7
+    ; GCN-NOP20-NEXT:   S_NOP 7
+    ; GCN-NOP20-NEXT:   S_NOP 3
+    ; GCN-NOP20-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64
+    ; GCN-NOP20-NEXT:   S_NOP 7
+    ; GCN-NOP20-NEXT:   S_NOP 7
+    ; GCN-NOP20-NEXT:   S_NOP 3
+    ; GCN-NOP20-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+    ; GCN-NOP20-NEXT:   S_NOP 7
+    ; GCN-NOP20-NEXT:   S_NOP 7
+    ; GCN-NOP20-NEXT:   S_NOP 3
+    ; GCN-NOP20-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 2, implicit-def $scc, implicit $scc
+    ; GCN-NOP20-NEXT: }
+    ; GCN-NOP20-NEXT: S_ENDPGM 0
+    $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+    BUNDLE implicit-def $sgpr0_sgpr1 {
+      $sgpr0_sgpr1 = S_GETPC_B64
+      $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+      $sgpr1 = S_ADDC_U32 $sgpr1, 2, implicit-def $scc, implicit $scc
+    }
+    S_ENDPGM 0
+...
+
+
+---
+name:            standard
+body:            |
+  bb.0:
+    ; GCN-LABEL: name: standard
+    ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
+    ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 4, implicit-def $scc
+    ; GCN-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 2, implicit-def $scc, implicit $scc
+    ; GCN-NEXT: S_ENDPGM 0
+    ;
+    ; GCN-NOP1-LABEL: name: standard
+    ; GCN-NOP1: S_NOP 0
+    ; GCN-NOP1-NEXT: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec
+    ; GCN-NOP1-NEXT: S_NOP 0
----------------
jrbyrnes wrote:

Ah, makes sense. Thanks for the explanation.

https://github.com/llvm/llvm-project/pull/117839


More information about the llvm-commits mailing list