[llvm] r372362 - MachineScheduler: Fix missing dependency with multiple subreg defs

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 19 17:09:15 PDT 2019


Author: arsenm
Date: Thu Sep 19 17:09:15 2019
New Revision: 372362

URL: http://llvm.org/viewvc/llvm-project?rev=372362&view=rev
Log:
MachineScheduler: Fix missing dependency with multiple subreg defs

If an instruction had multiple subregister defs, and one of them was
undef, this would improperly conclude all other lanes are
killed. There could still be other defs of those read-undef lanes in
other operands. This would improperly remove register uses from
CurrentVRegUses, so the visitation of later operands would not find
the necessary register dependency. This would also mean this would
fail or not depending on how different subregister def operands were
ordered.

On an undef subregister def, scan the instruction for other
subregister defs and avoid killing those.

This possibly should be deferring removing anything from
CurrentVRegUses until the entire instruction has been processed
instead.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
Modified:
    llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp

Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=372362&r1=372361&r2=372362&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Thu Sep 19 17:09:15 2019
@@ -400,6 +400,18 @@ void ScheduleDAGInstrs::addVRegDefDeps(S
     // earlier instruction.
     KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
 
+    if (MO.getSubReg() != 0 && MO.isUndef()) {
+      // There may be other subregister defs on the same instruction of the same
+      // register in later operands. The lanes of other defs will now be live
+      // after this instruction, so these should not be treated as killed by the
+      // instruction even though they appear to be killed in this one operand.
+      for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
+        const MachineOperand &OtherMO = MI->getOperand(I);
+        if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
+          KillLaneMask &= ~getLaneMaskForMO(OtherMO);
+      }
+    }
+
     // Clear undef flag, we'll re-add it later once we know which subregister
     // Def is first.
     MO.setIsUndef(false);

Added: llvm/trunk/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir?rev=372362&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir Thu Sep 19 17:09:15 2019
@@ -0,0 +1,86 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
+
+# Deciding which lanes are killed needs to account for other defs in the
+# instruction.
+#
+# addVRegDefDeps would encounter the %0.sub0 def and erase %0 from
+# current vreg uses because it shared no lanes with %0.sub1 use on the
+# nop. It then didn't see the lanemask when it reached the second
+# subreg def, and failed to add the necessary dependency between the
+# asm and S_NOP
+
+---
+name:            no_live_subrange_at_use
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; CHECK-LABEL: name: no_live_subrange_at_use
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
+  ; CHECK:   INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def undef %0.sub1
+  ; CHECK:   S_NOP 0, implicit %0.sub1
+  ; CHECK:   $sgpr10 = S_MOV_B32 -1
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+  bb.1:
+    %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3)
+    INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3)
+    INLINEASM &"", 1, 851977, %2
+    INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def %0.sub1
+    S_NOP 0, implicit %0.sub1
+    $sgpr10 = S_MOV_B32 -1
+    S_BRANCH %bb.1
+
+...
+
+# Different operand order
+---
+name:            no_live_subrange_at_use_swap
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; CHECK-LABEL: name: no_live_subrange_at_use_swap
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
+  ; CHECK:   INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1, 851978, def undef %0.sub1, 851978, def undef %0.sub0
+  ; CHECK:   S_NOP 0, implicit %0.sub1
+  ; CHECK:   $sgpr10 = S_MOV_B32 -1
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    %0.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+  bb.1:
+    %2:vgpr_32 = DS_READ_B32_gfx9 %1, 0, 0, implicit $exec :: (load 4, addrspace 3)
+    INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3)
+    INLINEASM &"", 1, 851977, %2
+    INLINEASM &"", 1, 851978, def %0.sub1, 851978, def undef %0.sub0
+    S_NOP 0, implicit %0.sub1
+    $sgpr10 = S_MOV_B32 -1
+    S_BRANCH %bb.1
+
+...




More information about the llvm-commits mailing list