[llvm] [AMDGPU] Correctly insert s_nops for implicit read of SDWA (PR #100276)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 24 11:39:33 PDT 2024
================
@@ -0,0 +1,54 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s
+
+---
+name: hazard_vcmpx_sdwa_permlane16
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+ ; HAZARD-LABEL: name: hazard_vcmpx_sdwa_permlane16
+ ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+ ; HAZARD-NEXT: {{ $}}
+ ; HAZARD-NEXT: renamable $vgpr7 = COPY $vgpr6, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr6 = COPY $vgpr5, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+ ; HAZARD-NEXT: KILL killed renamable $vgpr2, renamable $vgpr3
+ ; HAZARD-NEXT: KILL killed renamable $vgpr0, renamable $vgpr1
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+ ; HAZARD-NEXT: renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+ ; HAZARD-NEXT: S_NOP 0
+ ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; HAZARD-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+ ; HAZARD-NEXT: S_ENDPGM 0
+ ;
+ ; NOHAZARD-LABEL: name: hazard_vcmpx_sdwa_permlane16
+ ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr6
+ ; NOHAZARD-NEXT: {{ $}}
+ ; NOHAZARD-NEXT: renamable $vgpr7 = COPY $vgpr6, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr6 = COPY $vgpr5, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: KILL killed renamable $vgpr2, renamable $vgpr3
+ ; NOHAZARD-NEXT: KILL killed renamable $vgpr0, renamable $vgpr1
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+ ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ ; NOHAZARD-NEXT: GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+ ; NOHAZARD-NEXT: S_ENDPGM 0
+ renamable $vgpr7 = COPY $vgpr6, implicit $exec
+ renamable $vgpr6 = COPY $vgpr5, implicit $exec
+ renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, implicit $exec
+ renamable $vgpr5 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec
+ KILL killed renamable $vgpr2, renamable $vgpr3
+ KILL killed renamable $vgpr0, renamable $vgpr1
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 1, 0, 3, 3, implicit $exec
+ renamable $vgpr1 = V_ADD_U16_sdwa 0, $vgpr4, 0, $vgpr5, 0, 6, 0, 5, 5, implicit $exec
+ renamable $vgpr0 = V_OR_B32_sdwa 0, killed $vgpr1, 0, killed $vgpr0, 0, 5, 0, 0, 6, implicit $exec
+ renamable $vgpr0 = V_ADD_U16_sdwa 0, killed $vgpr4, 0, killed $vgpr5, 0, 1, 2, 6, 1, implicit $exec, implicit killed $vgpr0(tied-def 0)
+ GLOBAL_STORE_DWORD killed renamable $vgpr6_vgpr7, killed renamable $vgpr0, 0, 0, implicit $exec
+ S_ENDPGM 0
+...
----------------
arsenm wrote:
Also should test with inline asm user
https://github.com/llvm/llvm-project/pull/100276
More information about the llvm-commits
mailing list