[llvm] 09424f8 - [AMDGPU] Check for CopyToReg PhysReg clobbers in pre-RA-sched

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 30 09:18:28 PDT 2022


Author: jeff
Date: 2022-06-30T09:18:04-07:00
New Revision: 09424f802c8f69f8065e6cebddc7761602cc2d6d

URL: https://github.com/llvm/llvm-project/commit/09424f802c8f69f8065e6cebddc7761602cc2d6d
DIFF: https://github.com/llvm/llvm-project/commit/09424f802c8f69f8065e6cebddc7761602cc2d6d.diff

LOG: [AMDGPU] Check for CopyToReg PhysReg clobbers in pre-RA-sched

Differential Revision: https://reviews.llvm.org/D128681

Added: 
    llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
    llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 8097bb4c7f15f..78fc407e9573a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -440,17 +440,29 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
 /// CheckForLiveRegDef - Return true and update live register vector if the
 /// specified register def of the specified SUnit clobbers any "live" registers.
 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
-                               std::vector<SUnit*> &LiveRegDefs,
+                               std::vector<SUnit *> &LiveRegDefs,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVectorImpl<unsigned> &LRegs,
-                               const TargetRegisterInfo *TRI) {
+                               const TargetRegisterInfo *TRI,
+                               const SDNode *Node = nullptr) {
   bool Added = false;
   for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-    if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
-      if (RegAdded.insert(*AI).second) {
-        LRegs.push_back(*AI);
-        Added = true;
-      }
+    // Check if Ref is live.
+    if (!LiveRegDefs[*AI])
+      continue;
+
+    // Allow multiple uses of the same def.
+    if (LiveRegDefs[*AI] == SU)
+      continue;
+
+    // Allow multiple uses of same def
+    if (Node && LiveRegDefs[*AI]->getNode() == Node)
+      continue;
+
+    // Add Reg to the set of interfering live regs.
+    if (RegAdded.insert(*AI).second) {
+      LRegs.push_back(*AI);
+      Added = true;
     }
   }
   return Added;
@@ -502,6 +514,15 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
       }
       continue;
     }
+
+    if (Node->getOpcode() == ISD::CopyToReg) {
+      Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      if (Reg.isPhysical()) {
+        SDNode *SrcNode = Node->getOperand(2).getNode();
+        CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode);
+      }
+    }
+
     if (!Node->isMachineOpcode())
       continue;
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());

diff  --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 7a5e8ac6075e6..8a04ce7535a17 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1294,11 +1294,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
 
 /// CheckForLiveRegDef - Return true and update live register vector if the
 /// specified register def of the specified SUnit clobbers any "live" registers.
-static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
-                               SUnit **LiveRegDefs,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVectorImpl<unsigned> &LRegs,
-                               const TargetRegisterInfo *TRI) {
+                               const TargetRegisterInfo *TRI,
+                               const SDNode *Node = nullptr) {
   for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
 
     // Check if Ref is live.
@@ -1307,6 +1307,10 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
     // Allow multiple uses of the same def.
     if (LiveRegDefs[*AliasI] == SU) continue;
 
+    // Allow multiple uses of same def
+    if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
+      continue;
+
     // Add Reg to the set of interfering live regs.
     if (RegAdded.insert(*AliasI).second) {
       LRegs.push_back(*AliasI);
@@ -1387,6 +1391,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
       continue;
     }
 
+    if (Node->getOpcode() == ISD::CopyToReg) {
+      Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+      if (Reg.isPhysical()) {
+        SDNode *SrcNode = Node->getOperand(2).getNode();
+        CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
+                           SrcNode);
+      }
+    }
+
     if (!Node->isMachineOpcode())
       continue;
     // If we're in the middle of scheduling a call, don't begin scheduling

diff  --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll
new file mode 100644
index 0000000000000..2b8a712b28c05
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=source < %s | FileCheck -check-prefix=RRLIST %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=fast < %s | FileCheck -check-prefix=FAST %s
+
+
+define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %pout.coerce) {
+; RRLIST-LABEL: sccClobber:
+; RRLIST:       ; %bb.0: ; %entry
+; RRLIST-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x24
+; RRLIST-NEXT:    v_mov_b32_e32 v2, 0
+; RRLIST-NEXT:    s_waitcnt lgkmcnt(0)
+; RRLIST-NEXT:    s_load_dword s16, s[8:9], 0x0
+; RRLIST-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; RRLIST-NEXT:    s_load_dwordx2 s[12:13], s[4:5], 0x0
+; RRLIST-NEXT:    s_load_dwordx2 s[14:15], s[0:1], 0x44
+; RRLIST-NEXT:    s_load_dword s17, s[10:11], 0x0
+; RRLIST-NEXT:    s_waitcnt lgkmcnt(0)
+; RRLIST-NEXT:    s_min_i32 s4, s16, 0
+; RRLIST-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
+; RRLIST-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; RRLIST-NEXT:    s_and_b64 s[0:1], vcc, exec
+; RRLIST-NEXT:    s_cselect_b32 s0, s16, s17
+; RRLIST-NEXT:    s_cmp_eq_u64 s[12:13], s[2:3]
+; RRLIST-NEXT:    s_cselect_b32 s0, s4, s0
+; RRLIST-NEXT:    v_mov_b32_e32 v0, s0
+; RRLIST-NEXT:    global_store_dword v2, v0, s[14:15]
+; RRLIST-NEXT:    s_endpgm
+;
+; FAST-LABEL: sccClobber:
+; FAST:       ; %bb.0: ; %entry
+; FAST-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x24
+; FAST-NEXT:    v_mov_b32_e32 v2, 0
+; FAST-NEXT:    s_waitcnt lgkmcnt(0)
+; FAST-NEXT:    s_load_dword s16, s[8:9], 0x0
+; FAST-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; FAST-NEXT:    s_load_dwordx2 s[12:13], s[4:5], 0x0
+; FAST-NEXT:    s_load_dwordx2 s[14:15], s[0:1], 0x44
+; FAST-NEXT:    s_load_dword s17, s[10:11], 0x0
+; FAST-NEXT:    s_waitcnt lgkmcnt(0)
+; FAST-NEXT:    s_min_i32 s4, s16, 0
+; FAST-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
+; FAST-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; FAST-NEXT:    s_and_b64 s[0:1], vcc, exec
+; FAST-NEXT:    s_cselect_b32 s0, s16, s17
+; FAST-NEXT:    s_cmp_eq_u64 s[12:13], s[2:3]
+; FAST-NEXT:    s_cselect_b32 s0, s4, s0
+; FAST-NEXT:    v_mov_b32_e32 v0, s0
+; FAST-NEXT:    global_store_dword v2, v0, s[14:15]
+; FAST-NEXT:    s_endpgm
+entry:
+  %i = load i64, ptr addrspace(1) %a, align 8
+  %i.1 = load i64, ptr addrspace(1) %b, align 8
+  %i.2 = load i32, ptr addrspace(1) %e, align 4
+  %i.3 = load i32, ptr addrspace(1) %f, align 4
+  %cmp7.1 = icmp eq i64 %i, %i.1
+  %call.1 = tail call noundef i32 @llvm.smin.i32(i32 noundef 0, i32 noundef %i.2)
+  %cmp8.1 = icmp slt i64 %i, %i.1
+  %cond.1 = select i1 %cmp8.1, i32 %i.2, i32 %i.3
+  %cond14.1 = select i1 %cmp7.1, i32 %call.1, i32 %cond.1
+  store i32 %cond14.1, ptr addrspace(1) %pout.coerce, align 4
+  ret void
+}
+
+declare i32 @llvm.smin.i32(i32, i32)


        


More information about the llvm-commits mailing list