[llvm] [AMDGPU][True16][CodeGen] update wwm reg sorting check condition (PR #135053)

Brox Chen via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 18 13:48:11 PDT 2025


https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/135053

>From 9bb56c262a122479b51da4f9dc4b2cbbc26ea150 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Wed, 9 Apr 2025 10:38:34 -0400
Subject: [PATCH 1/3] skip 16bit register for wmm reg sorting

---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 9c737b4f3e378..8f488f5154650 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1650,7 +1650,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
     // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
     // reserved registers.
     const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
-    if (TRI->getRegSizeInBits(*RC) > 32)
+    if (TRI->getRegSizeInBits(*RC) != 32)
       continue;
     SortedWWMVGPRs.push_back(Reg);
   }

>From 062e087752164ef276fb2c038cfd0dfd79674ace Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Mon, 14 Apr 2025 12:13:54 -0400
Subject: [PATCH 2/3] test

---
 .../AMDGPU/wwm-reg-shift-down-gfx11plus.mir   | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir

diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir
new file mode 100644
index 0000000000000..3d4361c0d27a3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir
@@ -0,0 +1,27 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name:            wwm_skip_shift_16bit_reg
+tracksRegLiveness: true
+machineFunctionInfo:
+  wwmReservedRegs: ['$vgpr0_lo16']
+  isEntryFunction: false
+body:             |
+  bb.0:
+    liveins: $vgpr0,$vgpr1,$sgpr0
+    ; GCN-LABEL: name: wwm_skip_shift_16bit_reg
+    ; GCN: liveins: $vgpr0, $vgpr1, $sgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0_lo16, $sp_reg, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5)
+    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
+    ; GCN-NEXT: undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    ; GCN-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_DWORD_SADDR $sp_reg, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_lo16(tied-def 0) :: (load (s16) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
+    ; GCN-NEXT: SI_RETURN implicit $vgpr0
+    undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec
+    SI_RETURN implicit $vgpr0
+...
+

>From ed6bdc3ac4d82b1aa5407acb5ece722134295493 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Fri, 18 Apr 2025 16:47:24 -0400
Subject: [PATCH 3/3] update test

---
 .../AMDGPU/wwm-reg-shift-down-gfx11plus.mir   | 31 ++++++++++---------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir
index 3d4361c0d27a3..8131d5fa9884d 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reg-shift-down-gfx11plus.mir
@@ -1,27 +1,30 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog %s -o - | FileCheck -check-prefix=GCN %s
 
 ---
 name:            wwm_skip_shift_16bit_reg
 tracksRegLiveness: true
+noVRegs:           true
 machineFunctionInfo:
   wwmReservedRegs: ['$vgpr0_lo16']
   isEntryFunction: false
 body:             |
   bb.0:
-    liveins: $vgpr0,$vgpr1,$sgpr0
+    liveins: $sgpr0, $sgpr1
     ; GCN-LABEL: name: wwm_skip_shift_16bit_reg
-    ; GCN: liveins: $vgpr0, $vgpr1, $sgpr0
+    ; GCN: liveins: $sgpr0, $sgpr1
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0_lo16, $sp_reg, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
-    ; GCN-NEXT: undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec
-    ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_DWORD_SADDR $sp_reg, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_lo16(tied-def 0) :: (load (s16) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
-    ; GCN-NEXT: SI_RETURN implicit $vgpr0
-    undef $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, $sgpr0, 0, implicit $exec
-    SI_RETURN implicit $vgpr0
+    ; GCN-NEXT: $sgpr2 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sp_reg, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2
+    ; GCN-NEXT: renamable $vgpr0_hi16 = IMPLICIT_DEF
+    ; GCN-NEXT: $sgpr4 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
+    ; GCN-NEXT: $vgpr1 = IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr1 = V_CNDMASK_B16_t16_e64 0, killed $vgpr0_hi16, 0, $vgpr1, $sgpr0, 0, implicit $exec
+    ; GCN-NEXT: $exec_lo = EXIT_STRICT_WWM killed renamable $sgpr4
+    renamable $vgpr0_hi16 = IMPLICIT_DEF
+    $sgpr4 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
+    $vgpr0_lo16 = IMPLICIT_DEF
+    $vgpr0_lo16 = V_CNDMASK_B16_t16_e64 0, killed $vgpr0_hi16, 0, $vgpr0_lo16, $sgpr0, 0, implicit $exec
+    $exec_lo = EXIT_STRICT_WWM killed renamable $sgpr4
 ...
-



More information about the llvm-commits mailing list