[llvm] [AMDGPU] Fix moveToValue for copy to phys SGPR (PR #76715)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 2 05:37:34 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Pierre van Houtryve (Pierre-vh)

<details>
<summary>Changes</summary>

Fixes #<!-- -->76031

---
Full diff: https://github.com/llvm/llvm-project/pull/76715.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+11) 
- (modified) llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll (+13-12) 
- (added) llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir (+44) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2fb3957a1ca9dc..b08c579895aaae 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7198,6 +7198,17 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
     Register DstReg = Inst.getOperand(0).getReg();
     const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
 
+    // If it's a copy of a VGPR to a physical SGPR, insert a V_READFIRSTLANE and
+    // hope for the best.
+    if (Inst.isCopy() && DstReg.isPhysical() &&
+        RI.isVGPR(MRI, Inst.getOperand(1).getReg())) {
+      BuildMI(*Inst.getParent(), &Inst, Inst.getDebugLoc(),
+              get(AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand(0).getReg())
+          .add(Inst.getOperand(1));
+      Inst.eraseFromParent();
+      return;
+    }
+
     if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() &&
         NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
       // Instead of creating a copy where src and dst are the same register
diff --git a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
index ac196635b363a4..5c1a7093720428 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
@@ -4,19 +4,20 @@
 define amdgpu_cs <2 x i32> @f() {
 ; CHECK-LABEL: f:
 ; CHECK:       ; %bb.0: ; %bb
-; CHECK-NEXT:    s_mov_b32 s0, 0
-; CHECK-NEXT:    s_mov_b32 s1, s0
-; CHECK-NEXT:    s_mov_b32 s2, s0
-; CHECK-NEXT:    s_mov_b32 s3, s0
-; CHECK-NEXT:    s_mov_b32 s4, s0
-; CHECK-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
-; CHECK-NEXT:    s_mov_b32 s5, s0
-; CHECK-NEXT:    v_mov_b32_e32 v2, s0
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:    s_mov_b32 s5, s4
+; CHECK-NEXT:    s_mov_b32 s6, s4
+; CHECK-NEXT:    s_mov_b32 s7, s4
+; CHECK-NEXT:    s_mov_b32 s0, s4
+; CHECK-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; CHECK-NEXT:    s_mov_b32 s1, s4
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
-; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[4:5], v[0:1]
-; CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; CHECK-NEXT:    buffer_store_dwordx2 v[1:2], off, s[0:3], 0
+; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc_lo, s[0:1], v[0:1]
+; CHECK-NEXT:    v_mov_b32_e32 v1, s4
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; CHECK-NEXT:    ; return to shader part epilog
 bb:
   %i = call <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir
new file mode 100644
index 00000000000000..4292e76f370962
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-copy-to-sgpr.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -mcpu=tonga -run-pass=si-fix-sgpr-copies --verify-machineinstrs -o - %s | FileCheck %s
+
+# Copy to $sgpr0 is disconnected and becomes an IMPLICIT_DEF
+# Inserted V_AND_B32 defines virtual register after use.
+
+---
+name:            si_fix_sgpr_copies_breaks_function
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: si_fix_sgpr_copies_breaks_function
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+    ; CHECK-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]]
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_1]]
+    ; CHECK-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed [[COPY1]], [[COPY2]], implicit $exec
+    ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_2]], [[V_XOR_B32_e64_]], implicit $exec
+    ; CHECK-NEXT: $sgpr0 = V_READFIRSTLANE_B32 [[V_AND_B32_e64_]], implicit $exec
+    ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0
+    %0:sgpr_32 = COPY $sgpr0
+    %2:sreg_32 = S_MOV_B32 16
+    %3:sreg_32 = S_LSHR_B32 %0, killed %2, implicit-def dead $scc
+    %4:sreg_32 = COPY killed %3
+    %5:sreg_32 = S_MOV_B32 -32768
+    %7:vgpr_32 = COPY killed %5
+    %6:vgpr_32 = V_XOR_B32_e64 killed %4, %7, implicit $exec
+    %8:sreg_32 = S_MOV_B32 65535
+    %10:sreg_32 = COPY %6
+    %9:sreg_32 = S_AND_B32 killed %8, killed %10, implicit-def dead $scc
+    $sgpr0 = COPY %9
+    SI_RETURN_TO_EPILOG $sgpr0
+
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/76715


More information about the llvm-commits mailing list