[llvm] [AMDGPU] Account for existing SDWA selections (PR #123221)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 11 23:04:09 PST 2025
================
@@ -1,56 +1,124 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=CHECK %s
-
-# Currently the conversions in si-peephole-sdwa are disabled on preexisting sdwa instructions.
-# If they are reenabled, the code matches this pattern instead of the corresponding pattern
-# for V_LSHLREV_B32_sdwa further below:
-# [[V_LSHLREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_sdwa 0, %{{[0-9]+}}, 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, 6, 0, 6, 5, implicit $exec
-
-# TODO Implement a fix for the incorrect sdwa selection
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-peephole-sdwa -o - %s | FileCheck %s
---
name: sdwa_opsel_hazard
body: |
; CHECK-LABEL: name: sdwa_opsel_hazard
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed [[DEF1]], [[DEF2]], 0, 0, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF undef [[DEF]], %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 undef %5, 255, implicit $exec
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
- ; CHECK-NEXT: [[V_LSHLREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, undef %5, 0, 6, 0, 6, 0, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 255, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_sdwa:%[0-9]+]]:vgpr_32 = V_AND_B32_sdwa 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, [[V_MOV_B32_e32_]], 0, 6, 0, 5, 6, implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_sdwa 0, [[V_MOV_B32_e32_1]], 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, 6, 0, 6, 2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF killed undef %9, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32 = SI_IF killed undef [[SI_IF1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SI_IF3:%[0-9]+]]:sreg_32 = SI_IF undef [[DEF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, undef [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
+ ; CHECK-NEXT: [[SI_IF4:%[0-9]+]]:sreg_32 = SI_IF killed undef [[SI_IF]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9:
+ ; CHECK-NEXT: successors: %bb.10(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.10:
+ ; CHECK-NEXT: S_ENDPGM 0
bb.0:
- successors: %bb.2(0x40000000)
+ successors: %bb.7(0x40000000), %bb.8(0x40000000)
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
+
%0:sreg_32 = IMPLICIT_DEF
%1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed %1, %2, 0, 0, implicit $exec
- S_BRANCH %bb.2
+ %4:sreg_32 = SI_IF undef %0, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.7
bb.1:
+ successors: %bb.2(0x80000000)
+
%5:vgpr_32 = V_AND_B32_e64 undef %6, 255, implicit $exec
%7:vgpr_32 = V_LSHLREV_B32_e64 2, killed undef %5, implicit $exec
- S_ENDPGM 0
bb.2:
- successors: %bb.1(0x40000000)
+ successors: %bb.3(0x40000000), %bb.4(0x40000000)
- %6:vgpr_32 = V_LSHRREV_B32_e64 16, undef %3, implicit $exec
+ %8:sreg_32 = SI_IF killed undef %9, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ successors: %bb.4(0x80000000)
+
+ bb.4:
+ successors: %bb.5(0x40000000), %bb.6(0x40000000)
+
+ %10:sreg_32 = SI_IF killed undef %8, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ successors: %bb.6(0x80000000)
+ bb.6:
+ successors: %bb.9(0x40000000), %bb.10(0x40000000)
+
+ %11:sreg_32 = SI_IF undef %0, %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.9
+
+ bb.7:
+ successors: %bb.8(0x80000000)
+
+ bb.8:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+
+ %6:vgpr_32 = V_LSHRREV_B32_e64 16, undef %3, implicit $exec
+ %9:sreg_32 = SI_IF killed undef %4, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.1
+ bb.9:
+ successors: %bb.10(0x80000000)
+
+ bb.10:
+ S_ENDPGM 0
+
...
----------------
arsenm wrote:
Can we get dedicated tests for all of the mergable combinations? If you directly write the MIR with pre-folded sdwa, you shouldn't need so much MIR. IR tests would be nice but more difficult
https://github.com/llvm/llvm-project/pull/123221
More information about the llvm-commits
mailing list