[llvm-branch-commits] [llvm] AMDGPU: Add test for mfma rewrite pass respecting optnone (PR #153025)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 11 07:45:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/153025.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll (+35)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index 343a5c8511ee9..6f7809f46d10a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -3,6 +3,40 @@
target triple = "amdgcn-amd-amdhsa"
+define amdgpu_kernel void @respect_optnone(double %arg0, double %arg1, ptr addrspace(1) %ptr) #4 {
+; CHECK-LABEL: respect_optnone:
+; CHECK: ; %bb.0: ; %bb
+; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CHECK-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x8
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x10
+; CHECK-NEXT: s_mov_b32 s6, 0x3ff
+; CHECK-NEXT: v_and_b32_e64 v0, v0, s6
+; CHECK-NEXT: s_mov_b32 s6, 3
+; CHECK-NEXT: v_lshlrev_b32_e64 v0, s6, v0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[4:5]
+; CHECK-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; CHECK-NEXT: v_mov_b64_e32 v[4:5], s[2:3]
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_nop 0
+; CHECK-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; CHECK-NEXT: s_nop 5
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT: v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use a[0:1]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_endpgm
+bb:
+ %id = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep = getelementptr double, ptr addrspace(1) %ptr, i32 %id
+ %src2 = load double, ptr addrspace(1) %gep
+ %mai = call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %arg0, double %arg1, double %src2, i32 0, i32 0, i32 0)
+ call void asm sideeffect "; use $0", "a"(double %mai)
+ ret void
+}
+
define amdgpu_kernel void @test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma(ptr addrspace(1) %arg) #0 {
; CHECK-LABEL: test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma:
; CHECK: ; %bb.0: ; %bb
@@ -859,3 +893,4 @@ attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-p
attributes #1 = { mustprogress nofree norecurse nounwind willreturn "amdgpu-waves-per-eu"="8,8" }
attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #4 = { nounwind noinline optnone }
``````````
</details>
https://github.com/llvm/llvm-project/pull/153025
More information about the llvm-branch-commits
mailing list