[llvm] [LLVM] Make s_getpc_b64 rematerializable (PR #71823)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 13 09:52:09 PST 2023
https://github.com/Acim-Maravic updated https://github.com/llvm/llvm-project/pull/71823
>From 280606d500dbd94a1429395a17d3e9e5dd862c2e Mon Sep 17 00:00:00 2001
From: Acim Maravic <acim.maravic at syrmia.com>
Date: Mon, 13 Nov 2023 18:39:43 +0100
Subject: [PATCH] [LLVM] Make s_getpc_b64 rematerializable
---
llvm/test/CodeGen/AMDGPU/remat-sop.mir | 27 +++++++++++
llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll | 45 +++++++++++++++++++
2 files changed, 72 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
diff --git a/llvm/test/CodeGen/AMDGPU/remat-sop.mir b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
index 649f0d7f7799637..cd85562567f779a 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-sop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
@@ -573,3 +573,30 @@ body: |
S_NOP 0, implicit %2
S_ENDPGM 0
...
+
+---
+name: test_remat_s_getpc_b64
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: test_remat_s_getpc_b64
+ ; GCN: renamable $sgpr0_sgpr1 = S_GETPC_B64
+ ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr0_sgpr1, %stack.1, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_GETPC_B64
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_GETPC_B64
+ ; GCN-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store (s64) into %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.1, align 4, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr2_sgpr3
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1
+ ; GCN-NEXT: S_ENDPGM 0
+ %0:sgpr_64 = S_GETPC_B64
+ %1:sgpr_64 = S_GETPC_B64
+ %2:sgpr_64 = S_GETPC_B64
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
new file mode 100644
index 000000000000000..0182597cc8921d9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stress-regalloc=2 -verify-machineinstrs < %s | FileCheck %s
+
+
+define void @test_remat_s_getpc_b64() {
+; CHECK-LABEL: test_remat_s_getpc_b64:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; CHECK-NEXT: s_getpc_b64 s[4:5]
+; CHECK-NEXT: v_writelane_b32 v0, s30, 0
+; CHECK-NEXT: v_writelane_b32 v1, s4, 0
+; CHECK-NEXT: v_writelane_b32 v0, s31, 1
+; CHECK-NEXT: v_writelane_b32 v1, s5, 1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_readlane_b32 s4, v1, 0
+; CHECK-NEXT: v_readlane_b32 s5, v1, 1
+; CHECK-NEXT: v_mov_b32_e32 v2, s4
+; CHECK-NEXT: v_mov_b32_e32 v3, s5
+; CHECK-NEXT: global_store_dwordx2 v[1:2], v[2:3], off
+; CHECK-NEXT: v_readlane_b32 s31, v0, 1
+; CHECK-NEXT: v_readlane_b32 s30, v0, 0
+; CHECK-NEXT: ; kill: killed $vgpr1
+; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.amdgcn.s.getpc()
+ tail call void asm sideeffect "", "s"(i64 %0)
+ tail call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"()
+ store i64 %0, ptr addrspace(1) undef
+ ret void
+}
+
+declare i64 @llvm.amdgcn.s.getpc()
More information about the llvm-commits
mailing list