[PATCH] D149873: [AMDGPU][GFX908] IndirectCopyToAGPR: Confirm modified register is dst reg of accvgpr_write

Jeffrey Byrnes via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu May 4 10:09:23 PDT 2023


jrbyrnes created this revision.
jrbyrnes added reviewers: arsenm, rampitec.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl.
Herald added a project: All.
jrbyrnes requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

IndirectCopyToAGPR should be reworked as to avoid optimizing during copy lowering. However, as it stands, the code is buggy. This patch replaces the call to definesRegister with modifiesRegister, and confirms that the dest reg of the found accvgpr_write is in fact the src reg of our copy.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D149873

Files:
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
  llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir


Index: llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
+++ llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
@@ -29,6 +29,7 @@
     define amdgpu_kernel void @a_to_a() #0 { ret void }
     define amdgpu_kernel void @a2_to_a2() #0 { ret void }
     define amdgpu_kernel void @a2_to_a2_kill() #0 { ret void }
+    define amdgpu_kernel void @a2_to_a2_implicit_defs() #0 { ret void }
     define amdgpu_kernel void @a3_to_a3_nonoverlap_kill() #0 { ret void }
     define amdgpu_kernel void @a3_to_a3_overlap_kill() #0 { ret void }
     define amdgpu_kernel void @a4_to_a4() #0 { ret void }
@@ -887,6 +888,48 @@
     S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
 ...
 
+---
+name:            a2_to_a2_implicit_defs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $agpr0_agpr1
+    ; GFX908-LABEL: name: a2_to_a2_implicit_defs
+    ; GFX908: liveins: $agpr0_agpr1
+    ; GFX908-NEXT: {{  $}}
+    ; GFX908-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
+    ; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
+    ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr1_agpr2
+    ; GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr3_agpr4
+    ; GFX908-NEXT: $vgpr255 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit killed $agpr1_agpr2
+    ; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr255, implicit $exec, implicit $exec
+    ; GFX90A-LABEL: name: a2_to_a2_implicit_defs
+    ; GFX90A: liveins: $agpr0_agpr1
+    ; GFX90A-NEXT: {{  $}}
+    ; GFX90A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
+    ; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX90A-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
+    ; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX90A-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2
+    ; GFX90A-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec
+    ; GFX940-LABEL: name: a2_to_a2_implicit_defs
+    ; GFX940: liveins: $agpr0_agpr1
+    ; GFX940-NEXT: {{  $}}
+    ; GFX940-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
+    ; GFX940-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX940-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
+    ; GFX940-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
+    ; GFX940-NEXT: $agpr4 = V_ACCVGPR_MOV_B32 $agpr2, implicit $exec, implicit-def $agpr3_agpr4, implicit $agpr1_agpr2
+    ; GFX940-NEXT: $agpr3 = V_ACCVGPR_MOV_B32 $agpr1, implicit $exec, implicit killed $agpr1_agpr2, implicit $exec
+    $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
+    $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr1, implicit $exec, implicit-def $agpr1_agpr2
+    $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $agpr0_agpr1
+    $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec, implicit-def $agpr1_agpr2
+    $agpr3_agpr4 = COPY killed $agpr1_agpr2, implicit $exec
+...
+
 ---
 name:            a3_to_a3_nonoverlap_kill
 tracksRegLiveness: true
@@ -959,6 +1002,9 @@
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1
 ...
 
+
+
+
 ---
 name:            a4_to_a4
 tracksRegLiveness: true
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -580,11 +580,21 @@
   if (!RegsOverlap) {
     for (auto Def = MI, E = MBB.begin(); Def != E; ) {
       --Def;
-      if (!Def->definesRegister(SrcReg, &RI))
+
+      if (!Def->modifiesRegister(SrcReg, &RI))
         continue;
+
       if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
         break;
 
+      // The 0th operand of ACCVGPR_WRITE on gfx908 will always be the operand
+      // that potentially contains the bits we are interested in
+      if (!Def->getOperand(0).isReg())
+        break;
+
+      if (Def->getOperand(0).getReg() != SrcReg)
+        break;
+
       MachineOperand &DefOp = Def->getOperand(1);
       assert(DefOp.isReg() || DefOp.isImm());
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D149873.519551.patch
Type: text/x-patch
Size: 4970 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230504/7fc5156d/attachment.bin>


More information about the llvm-commits mailing list