[llvm] [AMDGPU] Prevent SDWA 'preserve' transformation for instructions in different basic blocks. (PR #82406)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 20 11:23:10 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Valery Pykhtin (vpykhtin)
<details>
<summary>Changes</summary>
This fixes crash when operand sources for V_OR instruction reside in different basic blocks.
---
Full diff: https://github.com/llvm/llvm-project/pull/82406.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp (+5-1)
- (modified) llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir (+67)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 53fc2c0686245f..739ee17464edec 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -739,6 +739,11 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
MachineInstr *SDWAInst = OrSDWADef->getParent();
MachineInstr *OtherInst = OrOtherDef->getParent();
+ // Instruction and operand sources should reside in the same BB.
+ if (SDWAInst->getParent() != MI.getParent() ||
+ OtherInst->getParent() != MI.getParent())
+ break;
+
// Check that OtherInstr is actually bitwise compatible with SDWAInst = their
// destination patterns don't overlap. Compatible instruction can be either
// regular instruction with compatible bitness or SDWA instruction with
@@ -815,7 +820,6 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
return std::make_unique<SDWADstPreserveOperand>(
OrDst, OrSDWADef, OrOtherDef, DstSel);
-
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
index f93456ccacb806..359945ff799434 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
@@ -160,3 +160,70 @@ body: |
S_ENDPGM 0
...
+---
+name: add_f16_u32_preserve_different_bb
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vreg_64 }
+ - { id: 1, class: vreg_64 }
+ - { id: 2, class: sreg_64 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+ - { id: 6, class: vgpr_32 }
+ - { id: 7, class: vgpr_32 }
+ - { id: 8, class: vgpr_32 }
+ - { id: 9, class: vgpr_32 }
+ - { id: 10, class: vgpr_32 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+body: |
+ ; SDWA-LABEL: name: add_f16_u32_preserve_different_bb
+ ; SDWA: bb.0:
+ ; SDWA-NEXT: successors: %bb.1(0x80000000)
+ ; SDWA-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
+ ; SDWA-NEXT: {{ $}}
+ ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ ; SDWA-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[FLAT_LOAD_DWORD]], implicit $exec
+ ; SDWA-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[FLAT_LOAD_DWORD1]], implicit $exec
+ ; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
+ ; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
+ ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 0, 4, 5, implicit $mode, implicit $exec
+ ; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
+ ; SDWA-NEXT: {{ $}}
+ ; SDWA-NEXT: bb.1:
+ ; SDWA-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_ADD_F16_sdwa]], [[V_MUL_F32_sdwa]], implicit $exec
+ ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_OR_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
+ ; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
+ ; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
+
+ %2 = COPY $sgpr30_sgpr31
+ %1 = COPY $vgpr2_vgpr3
+ %0 = COPY $vgpr0_vgpr1
+ %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+
+ %5 = V_AND_B32_e32 65535, %3, implicit $exec
+ %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
+ %7 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
+ %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec
+
+ %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
+ %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec
+ %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
+ %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec
+
+ bb.1:
+ %13 = V_OR_B32_e64 %10, %12, implicit $exec
+
+ FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
+ $sgpr30_sgpr31 = COPY %2
+ S_SETPC_B64_return $sgpr30_sgpr31
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/82406
More information about the llvm-commits
mailing list