[llvm] [AMDGPU] Account for existing SDWA selections (PR #123221)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 11 23:04:09 PST 2025


================
@@ -1,56 +1,124 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=CHECK %s
-
-# Currently the conversions in si-peephole-sdwa are disabled on preexisting sdwa instructions.
-# If they are reenabled, the code matches this pattern instead of the corresponding pattern
-# for V_LSHLREV_B32_sdwa further below:
-# [[V_LSHLREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_sdwa 0, %{{[0-9]+}}, 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, 6, 0, 6, 5, implicit $exec
-
-# TODO Implement a fix for the incorrect sdwa selection
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-peephole-sdwa -o - %s | FileCheck %s
 
 ---
 name:            sdwa_opsel_hazard
 body:             |
   ; CHECK-LABEL: name: sdwa_opsel_hazard
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.7(0x40000000), %bb.8(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed [[DEF1]], [[DEF2]], 0, 0, implicit $exec
-  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF undef [[DEF]], %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.7
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 undef %5, 255, implicit $exec
-  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
-  ; CHECK-NEXT:   [[V_LSHLREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, undef %5, 0, 6, 0, 6, 0, implicit $exec
-  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 255, implicit $exec
+  ; CHECK-NEXT:   [[V_AND_B32_sdwa:%[0-9]+]]:vgpr_32 = V_AND_B32_sdwa 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, [[V_MOV_B32_e32_]], 0, 6, 0, 5, 6, implicit $exec
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+  ; CHECK-NEXT:   [[V_LSHLREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_sdwa 0, [[V_MOV_B32_e32_1]], 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, 6, 0, 6, 2, implicit $exec
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF killed undef %9, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.6(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_IF2:%[0-9]+]]:sreg_32 = SI_IF killed undef [[SI_IF1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.9(0x40000000), %bb.10(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_IF3:%[0-9]+]]:sreg_32 = SI_IF undef [[DEF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.9
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, undef [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
+  ; CHECK-NEXT:   [[SI_IF4:%[0-9]+]]:sreg_32 = SI_IF killed undef [[SI_IF]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.9:
+  ; CHECK-NEXT:   successors: %bb.10(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.10:
+  ; CHECK-NEXT:   S_ENDPGM 0
   bb.0:
-    successors: %bb.2(0x40000000)
+    successors: %bb.7(0x40000000), %bb.8(0x40000000)
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
+
     %0:sreg_32 = IMPLICIT_DEF
     %1:sreg_64_xexec_xnull = IMPLICIT_DEF
     %2:vgpr_32 = IMPLICIT_DEF
     %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed %1, %2, 0, 0, implicit $exec
-    S_BRANCH %bb.2
+    %4:sreg_32 = SI_IF undef %0, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.7
 
   bb.1:
+    successors: %bb.2(0x80000000)
+
     %5:vgpr_32 = V_AND_B32_e64 undef %6, 255, implicit $exec
     %7:vgpr_32 = V_LSHLREV_B32_e64 2, killed undef %5, implicit $exec
-    S_ENDPGM 0
 
   bb.2:
-    successors: %bb.1(0x40000000)
+    successors: %bb.3(0x40000000), %bb.4(0x40000000)
 
-    %6:vgpr_32 = V_LSHRREV_B32_e64 16, undef %3, implicit $exec
+    %8:sreg_32 = SI_IF killed undef %9, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    successors: %bb.4(0x80000000)
+
+  bb.4:
+    successors: %bb.5(0x40000000), %bb.6(0x40000000)
+
+    %10:sreg_32 = SI_IF killed undef %8, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.5:
+    successors: %bb.6(0x80000000)
 
+  bb.6:
+    successors: %bb.9(0x40000000), %bb.10(0x40000000)
+
+    %11:sreg_32 = SI_IF undef %0, %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.9
+
+  bb.7:
+    successors: %bb.8(0x80000000)
+
+  bb.8:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+
+    %6:vgpr_32 = V_LSHRREV_B32_e64 16, undef %3, implicit $exec
+    %9:sreg_32 = SI_IF killed undef %4, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
     S_BRANCH %bb.1
 
+  bb.9:
+    successors: %bb.10(0x80000000)
+
+  bb.10:
+    S_ENDPGM 0
+
 ...
----------------
arsenm wrote:

Can we get dedicated tests for all of the mergable combinations? If you directly write the MIR with pre-folded sdwa, you shouldn't need so much MIR. IR tests would be nice but more difficult 

https://github.com/llvm/llvm-project/pull/123221


More information about the llvm-commits mailing list