[llvm] a845ea3 - [AMDGPU] Fix SDWA 'preserve' transformation for instructions in different basic blocks. (#82406)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 28 05:47:36 PST 2024
Author: Valery Pykhtin
Date: 2024-02-28T14:47:33+01:00
New Revision: a845ea3878f18878b6bbc91ff5fee2dd51a794f3
URL: https://github.com/llvm/llvm-project/commit/a845ea3878f18878b6bbc91ff5fee2dd51a794f3
DIFF: https://github.com/llvm/llvm-project/commit/a845ea3878f18878b6bbc91ff5fee2dd51a794f3.diff
LOG: [AMDGPU] Fix SDWA 'preserve' transformation for instructions in different basic blocks. (#82406)
This fixes crash when operand sources for V_OR instruction reside in
different basic blocks.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 53fc2c0686245f..afc380b4203457 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -472,12 +472,11 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
}
// Move MI before v_or_b32
- auto MBB = MI.getParent();
- MBB->remove(&MI);
- MBB->insert(getParentInst(), &MI);
+ MI.getParent()->remove(&MI);
+ getParentInst()->getParent()->insert(getParentInst(), &MI);
// Add Implicit use of preserved register
- MachineInstrBuilder MIB(*MBB->getParent(), MI);
+ MachineInstrBuilder MIB(*MI.getMF(), MI);
MIB.addReg(getPreservedOperand()->getReg(),
RegState::ImplicitKill,
getPreservedOperand()->getSubReg());
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
index f93456ccacb806..4c61e6803febf3 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
@@ -160,3 +160,60 @@ body: |
S_ENDPGM 0
...
+---
+name: add_f16_u32_preserve_
diff erent_bb
+tracksRegLiveness: true
+body: |
+ ; SDWA-LABEL: name: add_f16_u32_preserve_
diff erent_bb
+ ; SDWA: bb.0:
+ ; SDWA-NEXT: successors: %bb.1(0x80000000)
+ ; SDWA-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
+ ; SDWA-NEXT: {{ $}}
+ ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
+ ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ ; SDWA-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[FLAT_LOAD_DWORD]], implicit $exec
+ ; SDWA-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[FLAT_LOAD_DWORD1]], implicit $exec
+ ; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
+ ; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
+ ; SDWA-NEXT: {{ $}}
+ ; SDWA-NEXT: bb.1:
+ ; SDWA-NEXT: successors: %bb.2(0x80000000)
+ ; SDWA-NEXT: {{ $}}
+ ; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
+ ; SDWA-NEXT: {{ $}}
+ ; SDWA-NEXT: bb.2:
+ ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
+ ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
+ ; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
+ ; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
+
+ %2:sreg_64 = COPY $sgpr30_sgpr31
+ %1:vreg_64 = COPY $vgpr2_vgpr3
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+ %3:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+ %4:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
+
+ %5:vgpr_32 = V_AND_B32_e32 65535, %3, implicit $exec
+ %6:vgpr_32 = V_LSHRREV_B32_e64 16, %4, implicit $exec
+ %7:vgpr_32 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
+ %8:vgpr_32 = V_LSHRREV_B32_e32 24, %4, implicit $exec
+
+ %9:vgpr_32 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
+ %10:vgpr_32 = V_LSHLREV_B16_e64 8, %9, implicit $exec
+
+ bb.1:
+ %11:vgpr_32 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
+ %12:vgpr_32 = V_LSHLREV_B32_e64 16, %11, implicit $exec
+
+ bb.2:
+ %13:vgpr_32 = V_OR_B32_e64 %10, %12, implicit $exec
+
+ FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
+ $sgpr30_sgpr31 = COPY %2
+ S_SETPC_B64_return $sgpr30_sgpr31
+...
More information about the llvm-commits
mailing list