[llvm] [AMDGPU][NFC] Fix crash due to assertion failure (PR #123627)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 20 07:09:42 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Chinmay Deshpande (chinmaydd)
<details>
<summary>Changes</summary>
Add check for FLAT instructions that dont use vector registers when computing VALU hazard.
This fixes SWDEV-494413, SWDEV-5088813, SWDEV-499344, SWDEV-499349, SWDEV-504112 which were discovered using the ISEL Fuzzer.
---
Full diff: https://github.com/llvm/llvm-project/pull/123627.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+6-3)
- (added) llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll (+32)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 6baef137df5e16..873d18e30a430a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -858,9 +858,12 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
}
if (TII->isFLAT(MI)) {
- int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
- if (AMDGPU::getRegBitWidth(Desc.operands()[DataIdx].RegClass) > 64)
- return DataIdx;
+ // There is no hazard if the instruction does not use vector regs
+ if (VDataIdx == -1)
+ return -1;
+
+ if (AMDGPU::getRegBitWidth(VDataRCID) > 64)
+ return VDataIdx;
}
return -1;
diff --git a/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll b/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll
new file mode 100644
index 00000000000000..734f87056717d1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-crash-valu-hazard.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O2 < %s | FileCheck -check-prefixes=GFX942 %s
+
+ at G = global <2 x i32> splat (i32 5)
+
+define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) inreg nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
+; GFX942-LABEL: global_load_lds_dword_saddr:
+; GFX942: ; %bb.0: ; %main_body
+; GFX942-NEXT: s_getpc_b64 s[2:3]
+; GFX942-NEXT: s_add_u32 s2, s2, G at gotpcrel32@lo+4
+; GFX942-NEXT: s_addc_u32 s3, s3, G at gotpcrel32@hi+12
+; GFX942-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-NEXT: flat_load_dwordx2 v[4:5], v[2:3]
+; GFX942-NEXT: v_readfirstlane_b32 s2, v0
+; GFX942-NEXT: s_mov_b32 m0, s2
+; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mul_lo_u32 v0, v4, 10
+; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:32 nt
+; GFX942-NEXT: v_mul_lo_u32 v1, v5, 10
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX942-NEXT: s_endpgm
+main_body:
+ %LGV = load <2 x i32>, ptr @G, align 8
+ %B = mul <2 x i32> %LGV, splat (i32 10)
+ call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 32, i32 2)
+ store <2 x i32> %B, ptr @G, align 8
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/123627
More information about the llvm-commits
mailing list