[llvm] [AMDGPU] Mark VOPC_e64 instructions rematerializable (PR #186195)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 12 10:38:47 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Romanov Vlad (romanovvlad)

<details>
<summary>Changes</summary>

VOPC_e64 comparison instruction doesn't define VCC unlike _e32 counterparts. So this instruction should be suitable for ramaterialization.

---
Full diff: https://github.com/llvm/llvm-project/pull/186195.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/VOPCInstructions.td (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir (+6-10) 
- (added) llvm/test/CodeGen/AMDGPU/vopc-remat.mir (+46) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index c251f68319f60..674ee0f330a01 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -385,6 +385,7 @@ multiclass VOPC_Pseudos <string opName,
     VCMPVCMPXTable<opName#"_e64"> {
     let Defs = !if(DefExec, [EXEC], []);
     let SchedRW = P.Schedule;
+    let isReMaterializable = 1;
     let isCompare = 1;
     let isCommutable = 1;
     let AsmMatchConverter = !cond(
diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
index e7eefafe31203..13b1d76e14a00 100644
--- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
+++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir
@@ -44,14 +44,10 @@ body:             |
   ; CHECK-NEXT:   SI_SPILL_S32_SAVE $sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   SI_SPILL_S32_SAVE $sgpr14, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
   ; CHECK-NEXT:   renamable $sgpr14_sgpr15 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.3, align 4, addrspace 5)
   ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5)
+  ; CHECK-NEXT:   SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.3, align 4, addrspace 5)
   ; CHECK-NEXT:   renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
   ; CHECK-NEXT:   renamable $sgpr56 = S_MOV_B32 0
-  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
-  ; CHECK-NEXT:   SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.5, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.5, align 4, addrspace 5)
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec
   ; CHECK-NEXT:   renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec
   ; CHECK-NEXT:   renamable $sgpr57 = S_MOV_B32 1083786240
@@ -167,7 +163,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.12(0x40000000), %bb.6(0x40000000)
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr14_sgpr15, $sgpr34_sgpr35, $sgpr100_sgpr101
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5)
+  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.3, align 4, addrspace 5)
   ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr54_sgpr55, implicit-def dead $scc
   ; CHECK-NEXT:   renamable $sgpr54_sgpr55 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec
   ; CHECK-NEXT:   $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13
@@ -215,13 +211,13 @@ body:             |
   ; CHECK-NEXT:   $sgpr13 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   renamable $sgpr84 = COPY killed renamable $sgpr8
   ; CHECK-NEXT:   renamable $sgpr33 = COPY killed renamable $sgpr16
-  ; CHECK-NEXT:   renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr14_sgpr15
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37 = COPY killed renamable $sgpr14_sgpr15
   ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83
   ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9
-  ; CHECK-NEXT:   renamable $sgpr14_sgpr15 = COPY killed renamable $sgpr48_sgpr49
+  ; CHECK-NEXT:   renamable $sgpr14_sgpr15 = COPY killed renamable $sgpr36_sgpr37
   ; CHECK-NEXT:   renamable $sgpr16 = COPY killed renamable $sgpr33
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr68_sgpr69
   ; CHECK-NEXT:   renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr70_sgpr71
@@ -257,7 +253,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.15(0x40000000), %bb.14(0x40000000)
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr14_sgpr15, $sgpr34_sgpr35, $sgpr100_sgpr101
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.5, align 4, addrspace 5)
+  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec
   ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.15, implicit $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.14
@@ -270,7 +266,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.11(0x40000000), %bb.16(0x40000000)
   ; CHECK-NEXT:   liveins: $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr14_sgpr15, $sgpr34_sgpr35, $sgpr100_sgpr101
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.3, align 4, addrspace 5)
+  ; CHECK-NEXT:   renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec
   ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.11, implicit $vcc
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/vopc-remat.mir b/llvm/test/CodeGen/AMDGPU/vopc-remat.mir
new file mode 100644
index 0000000000000..fb4334384cca6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vopc-remat.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 --stress-regalloc=2 -start-before=greedy -stop-after=virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s
+
+# Test that V_CMP_*_e64 instructions are rematerialized instead of spilled.
+
+---
+name:            test_vopc_e64_remat
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: $sgpr32
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GCN-LABEL: name: test_vopc_e64_remat
+    ; GCN: liveins: $vgpr0, $vgpr1
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GCN-NEXT: renamable $sgpr6_sgpr7 = V_CMP_LT_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
+    ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 killed renamable $sgpr4_sgpr5, $exec, implicit-def $scc
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5
+    ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 killed renamable $sgpr6_sgpr7, $exec, implicit-def $scc
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5
+    ; GCN-NEXT: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 killed renamable $sgpr4_sgpr5, $exec, implicit-def $scc
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5, implicit [[COPY]], implicit [[COPY1]]
+    ; GCN-NEXT: S_ENDPGM 0
+    %10:vgpr_32 = COPY $vgpr0
+    %11:vgpr_32 = COPY $vgpr1
+    %0:sreg_64 = V_CMP_GT_U32_e64 %10, %11, implicit $exec
+    %1:sreg_64 = V_CMP_LT_U32_e64 %10, %11, implicit $exec
+    %2:sreg_64 = V_CMP_EQ_U32_e64 %10, %11, implicit $exec
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    %3:sreg_64 = S_AND_B64 %0, $exec, implicit-def $scc
+    S_NOP 0, implicit %3
+    %4:sreg_64 = S_AND_B64 %1, $exec, implicit-def $scc
+    S_NOP 0, implicit %4
+    %5:sreg_64 = S_AND_B64 %2, $exec, implicit-def $scc
+    S_NOP 0, implicit %5, implicit %10, implicit %11
+    S_ENDPGM 0
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/186195


More information about the llvm-commits mailing list