[llvm] ffd6aaf - AMDGPU: Make packed 32-bit instructions rematerializable
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 29 08:58:05 PDT 2022
Author: Matt Arsenault
Date: 2022-06-29T11:57:54-04:00
New Revision: ffd6aaf5b6663836700663ae0f9a2d80f4056689
URL: https://github.com/llvm/llvm-project/commit/ffd6aaf5b6663836700663ae0f9a2d80f4056689
DIFF: https://github.com/llvm/llvm-project/commit/ffd6aaf5b6663836700663ae0f9a2d80f4056689.diff
LOG: AMDGPU: Make packed 32-bit instructions rematerializable
Added:
Modified:
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/test/CodeGen/AMDGPU/remat-vop.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index af7569126131..2959ecaa5751 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -667,7 +667,7 @@ def MAIInstInfoTable : GenericTable {
let PrimaryKeyName = "getMAIInstInfoHelper";
}
-let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 in {
+let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1, isReMaterializable = 1 in {
defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>;
defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>;
defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>;
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index b34afe6182e7..001799cb3056 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -5247,3 +5247,141 @@ body: |
S_NOP 0, implicit %3
S_ENDPGM 0, implicit %0
...
+
+---
+name: test_remat_v_pk_fma_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; GCN-LABEL: name: test_remat_v_pk_fma_f32
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+ %1:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %2:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %3:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_NOP 0, implicit %3
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: test_no_remat_v_pk_fma_f32
+tracksRegLiveness: true
+machineFunctionInfo:
+ stackPtrOffsetReg: $sgpr32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; GCN-LABEL: name: test_no_remat_v_pk_fma_f32
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+ %1:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %2:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %3:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_NOP 0, implicit %3
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: test_remat_v_pk_mul_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; GCN-LABEL: name: test_remat_v_pk_mul_f32
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+ %1:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %2:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %3:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_NOP 0, implicit %3
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: test_remat_v_pk_add_f32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; GCN-LABEL: name: test_remat_v_pk_add_f32
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+ %1:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %2:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %3:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_NOP 0, implicit %3
+ S_ENDPGM 0, implicit %0
+...
+
+---
+name: test_remat_v_pk_mov_b32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; GCN-LABEL: name: test_remat_v_pk_mov_b32
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 9, $vgpr0_vgpr1, 9, $vgpr0_vgpr1, 12, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 10, $vgpr0_vgpr1, 10, $vgpr0_vgpr1, 13, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+ ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ %0:vreg_64_align2 = COPY $vgpr0_vgpr1
+ %1:vreg_64_align2 = V_PK_MOV_B32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $exec
+ %2:vreg_64_align2 = V_PK_MOV_B32 9, %0, 9, %0, 12, 0, 0, 0, 0, implicit $exec
+ %3:vreg_64_align2 = V_PK_MOV_B32 10, %0, 10, %0, 13, 0, 0, 0, 0, implicit $exec
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_NOP 0, implicit %3
+ S_ENDPGM 0, implicit %0
+...
More information about the llvm-commits
mailing list