[llvm] e73b356 - [SelectionDAG] Fix EmitCopyFromReg for cloned nodes

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 21 02:47:23 PST 2022


Author: Jay Foad
Date: 2022-12-21T10:44:45Z
New Revision: e73b35699be0166d2de8cbb7cb6ac544a3e63f5f

URL: https://github.com/llvm/llvm-project/commit/e73b35699be0166d2de8cbb7cb6ac544a3e63f5f
DIFF: https://github.com/llvm/llvm-project/commit/e73b35699be0166d2de8cbb7cb6ac544a3e63f5f.diff

LOG: [SelectionDAG] Fix EmitCopyFromReg for cloned nodes

Change EmitCopyFromReg to check all users of cloned nodes (as well as
non-cloned nodes) instead of assuming that they all copy the defined
value back to the same physical register.

This partially reverts 968e2e7b3db1 (svn r62356) which claimed:

  CreateVirtualRegisters does trivial copy coalescing. If a node def is
  used by a single CopyToReg, it reuses the virtual register assigned to
  the CopyToReg. This won't work for SDNode that is a clone or is itself
  cloned. Disable this optimization for those nodes or it can end up
  with non-SSA machine instructions.

This is true for CreateVirtualRegisters but r62356 also updated
EmitCopyFromReg where it is not true. Firstly EmitCopyFromReg only
coalesces physical register copies, so the concern about SSA form does
not apply. Secondly making the loop over users in EmitCopyFromReg
conditional on `!IsClone && !IsCloned` breaks the handling of cloned
nodes, because it leaves MatchReg set to true by default, so it assumes
that all users will copy the defined value back to the same physical
register instead of actually checking.

Differential Revision: https://reviews.llvm.org/D140417

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
    llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
    llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 9d225ad4e320..e2e5158d7398 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -81,9 +81,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
 
 /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
 /// implicit physical register output.
-void InstrEmitter::
-EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
-                Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) {
+void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+                                   Register SrcReg,
+                                   DenseMap<SDValue, Register> &VRBaseMap) {
   Register VRBase;
   if (SrcReg.isVirtual()) {
     // Just use the input register directly!
@@ -106,51 +106,50 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   if (TLI->isTypeLegal(VT))
     UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
 
-  if (!IsClone && !IsCloned)
-    for (SDNode *User : Node->uses()) {
-      bool Match = true;
-      if (User->getOpcode() == ISD::CopyToReg &&
-          User->getOperand(2).getNode() == Node &&
-          User->getOperand(2).getResNo() == ResNo) {
-        Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
-        if (DestReg.isVirtual()) {
-          VRBase = DestReg;
-          Match = false;
-        } else if (DestReg != SrcReg)
-          Match = false;
-      } else {
-        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
-          SDValue Op = User->getOperand(i);
-          if (Op.getNode() != Node || Op.getResNo() != ResNo)
-            continue;
-          MVT VT = Node->getSimpleValueType(Op.getResNo());
-          if (VT == MVT::Other || VT == MVT::Glue)
-            continue;
-          Match = false;
-          if (User->isMachineOpcode()) {
-            const MCInstrDesc &II = TII->get(User->getMachineOpcode());
-            const TargetRegisterClass *RC = nullptr;
-            if (i+II.getNumDefs() < II.getNumOperands()) {
-              RC = TRI->getAllocatableClass(
-                TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
-            }
-            if (!UseRC)
-              UseRC = RC;
-            else if (RC) {
-              const TargetRegisterClass *ComRC =
+  for (SDNode *User : Node->uses()) {
+    bool Match = true;
+    if (User->getOpcode() == ISD::CopyToReg &&
+        User->getOperand(2).getNode() == Node &&
+        User->getOperand(2).getResNo() == ResNo) {
+      Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+      if (DestReg.isVirtual()) {
+        VRBase = DestReg;
+        Match = false;
+      } else if (DestReg != SrcReg)
+        Match = false;
+    } else {
+      for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+        SDValue Op = User->getOperand(i);
+        if (Op.getNode() != Node || Op.getResNo() != ResNo)
+          continue;
+        MVT VT = Node->getSimpleValueType(Op.getResNo());
+        if (VT == MVT::Other || VT == MVT::Glue)
+          continue;
+        Match = false;
+        if (User->isMachineOpcode()) {
+          const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+          const TargetRegisterClass *RC = nullptr;
+          if (i + II.getNumDefs() < II.getNumOperands()) {
+            RC = TRI->getAllocatableClass(
+                TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF));
+          }
+          if (!UseRC)
+            UseRC = RC;
+          else if (RC) {
+            const TargetRegisterClass *ComRC =
                 TRI->getCommonSubClass(UseRC, RC);
-              // If multiple uses expect disjoint register classes, we emit
-              // copies in AddRegisterOperand.
-              if (ComRC)
-                UseRC = ComRC;
-            }
+            // If multiple uses expect disjoint register classes, we emit
+            // copies in AddRegisterOperand.
+            if (ComRC)
+              UseRC = ComRC;
           }
         }
       }
-      MatchReg &= Match;
-      if (VRBase)
-        break;
     }
+    MatchReg &= Match;
+    if (VRBase)
+      break;
+  }
 
   const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr;
   SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
@@ -1096,7 +1095,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
         continue;
       // This implicitly defined physreg has a use.
       UsedRegs.push_back(Reg);
-      EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+      EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap);
     }
   }
 
@@ -1191,7 +1190,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
   }
   case ISD::CopyFromReg: {
     unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
-    EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+    EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap);
     break;
   }
   case ISD::EH_LABEL:

diff  --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index ced8f064b9be..0da4b962450c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -44,10 +44,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
 
   /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
   /// implicit physical register output.
-  void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
-                       bool IsClone, bool IsCloned,
-                       Register SrcReg,
-                       DenseMap<SDValue, Register> &VRBaseMap);
+  void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
+                       Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap);
 
   void CreateVirtualRegisters(SDNode *Node,
                               MachineInstrBuilder &MIB,

diff  --git a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
index b3142de50ffd..b1458270bba9 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
@@ -14,18 +14,23 @@ define void @f(i32 %arg, float* %ptr) {
 ; ISA-NEXT:    v_mov_b32_e32 v7, 0
 ; ISA-NEXT:    s_waitcnt lgkmcnt(0)
 ; ISA-NEXT:    s_cmp_lg_u32 s4, 0
+; ISA-NEXT:    s_cselect_b32 s6, -1, 0
+; ISA-NEXT:    s_and_b32 s6, s6, exec_lo
 ; ISA-NEXT:    s_cselect_b32 s6, s5, 0
 ; ISA-NEXT:    s_lshr_b32 s7, 1, s4
 ; ISA-NEXT:    s_cmp_lg_u32 s4, 0
 ; ISA-NEXT:    v_cvt_f32_i32_e32 v0, s6
+; ISA-NEXT:    s_cselect_b32 s8, -1, 0
+; ISA-NEXT:    s_and_b32 s8, s8, exec_lo
 ; ISA-NEXT:    s_cselect_b32 s7, s7, 0
 ; ISA-NEXT:    s_lshr_b32 s5, s5, 1
 ; ISA-NEXT:    s_cmp_lg_u32 s4, 0
-; ISA-NEXT:    v_cvt_f32_ubyte0_e32 v3, s7
+; ISA-NEXT:    v_cvt_f32_ubyte0_e32 v4, s7
+; ISA-NEXT:    s_cselect_b32 s4, -1, 0
+; ISA-NEXT:    v_cndmask_b32_e64 v3, 0, 1.0, s4
+; ISA-NEXT:    s_and_b32 s4, s4, exec_lo
 ; ISA-NEXT:    s_cselect_b32 s4, s5, 0
 ; ISA-NEXT:    v_cvt_f32_i32_e32 v5, s4
-; ISA-NEXT:    s_cselect_b32 s4, -1, 0
-; ISA-NEXT:    v_cndmask_b32_e64 v4, 0, 1.0, s4
 ; ISA-NEXT:    s_mov_b32 s4, 0
 ; ISA-NEXT:    v_and_b32_e32 v5, 0x7fffffff, v5
 ; ISA-NEXT:  .LBB0_1: ; %bb14
@@ -33,9 +38,9 @@ define void @f(i32 %arg, float* %ptr) {
 ; ISA-NEXT:    v_mov_b32_e32 v6, v7
 ; ISA-NEXT:    s_and_b32 s5, exec_lo, vcc_lo
 ; ISA-NEXT:    s_or_b32 s4, s5, s4
-; ISA-NEXT:    v_add_f32_e32 v7, v6, v4
+; ISA-NEXT:    v_add_f32_e32 v7, v6, v3
 ; ISA-NEXT:    v_add_f32_e32 v7, v7, v5
-; ISA-NEXT:    v_add_f32_e32 v7, v7, v3
+; ISA-NEXT:    v_add_f32_e32 v7, v7, v4
 ; ISA-NEXT:    v_add_f32_e32 v7, v7, v0
 ; ISA-NEXT:    s_andn2_b32 exec_lo, exec_lo, s4
 ; ISA-NEXT:    s_cbranch_execnz .LBB0_1
@@ -59,45 +64,50 @@ define void @f(i32 %arg, float* %ptr) {
   ; MIR-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0
   ; MIR-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; MIR-NEXT:   S_CMP_LG_U32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+  ; MIR-NEXT:   [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
+  ; MIR-NEXT:   $scc = COPY [[COPY5]]
   ; MIR-NEXT:   [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY3]], [[S_MOV_B32_]], implicit $scc
   ; MIR-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
   ; MIR-NEXT:   [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_MOV_B32_1]], [[COPY4]], implicit-def dead $scc
   ; MIR-NEXT:   S_CMP_LG_U32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+  ; MIR-NEXT:   [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
+  ; MIR-NEXT:   $scc = COPY [[COPY6]]
   ; MIR-NEXT:   [[S_CSELECT_B32_1:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_LSHR_B32_]], [[S_MOV_B32_]], implicit $scc
   ; MIR-NEXT:   [[S_LSHR_B32_1:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY3]], [[S_MOV_B32_1]], implicit-def dead $scc
   ; MIR-NEXT:   S_CMP_LG_U32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc
+  ; MIR-NEXT:   [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
   ; MIR-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; MIR-NEXT:   [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
+  ; MIR-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
+  ; MIR-NEXT:   $scc = COPY [[COPY7]]
   ; MIR-NEXT:   [[S_CSELECT_B32_2:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_LSHR_B32_1]], [[S_MOV_B32_]], implicit $scc
   ; MIR-NEXT:   [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 killed [[S_CSELECT_B32_2]], 0, 0, implicit $mode, implicit $exec
   ; MIR-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-  ; MIR-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[V_CVT_F32_I32_e64_]]
-  ; MIR-NEXT:   [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 killed [[COPY6]], killed [[S_MOV_B32_2]], implicit-def dead $scc
+  ; MIR-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[V_CVT_F32_I32_e64_]]
+  ; MIR-NEXT:   [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 killed [[COPY9]], killed [[S_MOV_B32_2]], implicit-def dead $scc
   ; MIR-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
   ; MIR-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
-  ; MIR-NEXT:   [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
-  ; MIR-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_3]]
-  ; MIR-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_4]], 0, [[COPY8]], [[COPY7]], implicit $exec
-  ; MIR-NEXT:   [[COPY9:%[0-9]+]]:sgpr_32 = COPY [[V_CNDMASK_B32_e64_]]
+  ; MIR-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_3]]
+  ; MIR-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[S_MOV_B32_4]], 0, [[COPY10]], [[COPY7]], implicit $exec
+  ; MIR-NEXT:   [[COPY11:%[0-9]+]]:sgpr_32 = COPY [[V_CNDMASK_B32_e64_]]
   ; MIR-NEXT:   [[V_CVT_F32_UBYTE0_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e64 killed [[S_CSELECT_B32_1]], 0, 0, implicit $exec
-  ; MIR-NEXT:   [[COPY10:%[0-9]+]]:sgpr_32 = COPY [[V_CVT_F32_UBYTE0_e64_]]
+  ; MIR-NEXT:   [[COPY12:%[0-9]+]]:sgpr_32 = COPY [[V_CVT_F32_UBYTE0_e64_]]
   ; MIR-NEXT:   [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 killed [[S_CSELECT_B32_]], 0, 0, implicit $mode, implicit $exec
-  ; MIR-NEXT:   [[COPY11:%[0-9]+]]:sgpr_32 = COPY [[V_CVT_F32_I32_e64_1]]
+  ; MIR-NEXT:   [[COPY13:%[0-9]+]]:sgpr_32 = COPY [[V_CVT_F32_I32_e64_1]]
   ; MIR-NEXT:   [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[COPY2]], [[S_MOV_B32_1]], implicit $exec
-  ; MIR-NEXT:   [[COPY12:%[0-9]+]]:vreg_1 = COPY [[V_CMP_LT_I32_e64_]]
+  ; MIR-NEXT:   [[COPY14:%[0-9]+]]:vreg_1 = COPY [[V_CMP_LT_I32_e64_]]
   ; MIR-NEXT: {{  $}}
   ; MIR-NEXT: bb.1.bb14:
   ; MIR-NEXT:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
   ; MIR-NEXT: {{  $}}
   ; MIR-NEXT:   [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %7, %bb.1
   ; MIR-NEXT:   [[PHI1:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_4]], %bb.0, %8, %bb.1
-  ; MIR-NEXT:   [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY12]]
-  ; MIR-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[COPY13]], [[PHI]], implicit-def dead $scc
-  ; MIR-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PHI1]], 0, [[COPY9]], 0, 0, implicit $mode, implicit $exec
+  ; MIR-NEXT:   [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY14]]
+  ; MIR-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[COPY15]], [[PHI]], implicit-def dead $scc
+  ; MIR-NEXT:   [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[PHI1]], 0, [[COPY11]], 0, 0, implicit $mode, implicit $exec
   ; MIR-NEXT:   [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, [[S_AND_B32_]], 0, 0, implicit $mode, implicit $exec
-  ; MIR-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_1]], 0, [[COPY10]], 0, 0, implicit $mode, implicit $exec
-  ; MIR-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_2]], 0, [[COPY11]], 0, 0, implicit $mode, implicit $exec
-  ; MIR-NEXT:   [[COPY14:%[0-9]+]]:sgpr_32 = COPY [[V_ADD_F32_e64_3]]
+  ; MIR-NEXT:   [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_1]], 0, [[COPY12]], 0, 0, implicit $mode, implicit $exec
+  ; MIR-NEXT:   [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_2]], 0, [[COPY13]], 0, 0, implicit $mode, implicit $exec
+  ; MIR-NEXT:   [[COPY16:%[0-9]+]]:sgpr_32 = COPY [[V_ADD_F32_e64_3]]
   ; MIR-NEXT:   SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; MIR-NEXT:   S_BRANCH %bb.2
   ; MIR-NEXT: {{  $}}
@@ -105,7 +115,7 @@ define void @f(i32 %arg, float* %ptr) {
   ; MIR-NEXT:   [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1
   ; MIR-NEXT:   [[PHI3:%[0-9]+]]:sreg_32 = PHI [[SI_IF_BREAK]], %bb.1
   ; MIR-NEXT:   SI_END_CF [[PHI3]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; MIR-NEXT:   FLAT_STORE_DWORD [[COPY5]], [[PHI2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.ptr)
+  ; MIR-NEXT:   FLAT_STORE_DWORD [[COPY8]], [[PHI2]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.ptr)
   ; MIR-NEXT:   SI_RETURN
 bb:
   %i = load <2 x i32>, <2 x i32> addrspace(4)* null, align 4294967296


        


More information about the llvm-commits mailing list