[llvm] [AMDGPU] Correctly insert s_nops for implicit read of SDWA (PR #100276)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 24 11:39:33 PDT 2024


================
@@ -0,0 +1,54 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s
+
+---
+name:            hazard_vcmpx_sdwa_permlane16
+body:            |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+    ; HAZARD-LABEL: name: hazard_vcmpx_sdwa_permlane16
+    ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+    ; HAZARD-NEXT: {{  $}}
+    ; HAZARD-NEXT: renamable $vgpr7 = COPY $vgpr6, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr6 = COPY $vgpr5, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+    ; HAZARD-NEXT: KILL killed renamable $vgpr2, renamable $vgpr3
+    ; HAZARD-NEXT: KILL killed renamable $vgpr0, renamable $vgpr1
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+    ; HAZARD-NEXT: renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+    ; HAZARD-NEXT: S_NOP 0
+    ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; HAZARD-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+    ; HAZARD-NEXT: S_ENDPGM 0
+    ;
+    ; NOHAZARD-LABEL: name: hazard_vcmpx_sdwa_permlane16
+    ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+    ; NOHAZARD-NEXT: {{  $}}
+    ; NOHAZARD-NEXT: renamable $vgpr7 = COPY $vgpr6, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr6 = COPY $vgpr5, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+    ; NOHAZARD-NEXT: KILL killed renamable $vgpr2, renamable $vgpr3
+    ; NOHAZARD-NEXT: KILL killed renamable $vgpr0, renamable $vgpr1
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+    ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+    ; NOHAZARD-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+    ; NOHAZARD-NEXT: S_ENDPGM 0
+  renamable $vgpr7 = COPY $vgpr6, implicit $exec
+  renamable $vgpr6 = COPY $vgpr5, implicit $exec
+  renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+  renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+  KILL killed renamable $vgpr2, renamable $vgpr3
+  KILL killed renamable $vgpr0, renamable $vgpr1
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+  renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+  renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+  renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+  GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+  S_ENDPGM 0
+...
----------------
arsenm wrote:

Also should test with inline asm user 

https://github.com/llvm/llvm-project/pull/100276


More information about the llvm-commits mailing list