[llvm] r375300 - LiveIntervals: Fix handleMoveUp with subreg def moving across a def

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 18 16:24:25 PDT 2019


Author: arsenm
Date: Fri Oct 18 16:24:25 2019
New Revision: 375300

URL: http://llvm.org/viewvc/llvm-project?rev=375300&view=rev
Log:
LiveIntervals: Fix handleMoveUp with subreg def moving across a def

If a subregister def was moved across another subregister def and
another use, the main range was not correctly updated. The end point
of the moved interval ended too early and missed the use from theh
other lanes in the subreg def.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
Modified:
    llvm/trunk/lib/CodeGen/LiveIntervals.cpp
    llvm/trunk/unittests/MI/LiveIntervalTest.cpp

Modified: llvm/trunk/lib/CodeGen/LiveIntervals.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervals.cpp?rev=375300&r1=375299&r2=375300&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LiveIntervals.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveIntervals.cpp Fri Oct 18 16:24:25 2019
@@ -1288,6 +1288,20 @@ private:
           const SlotIndex SplitPos = NewIdxDef;
           OldIdxVNI = OldIdxIn->valno;
 
+          SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end;
+          LiveRange::iterator Prev = std::prev(OldIdxIn);
+          if (OldIdxIn != LR.begin() &&
+              SlotIndex::isEarlierInstr(NewIdx, Prev->end)) {
+            // If the segment before OldIdx read a value defined earlier than
+            // NewIdx, the moved instruction also reads and forwards that
+            // value. Extend the lifetime of the new def point.
+
+            // Extend to where the previous range started, unless there is
+            // another redef first.
+            NewDefEndPoint = std::min(OldIdxIn->start,
+                                      std::next(NewIdxOut)->start);
+          }
+
           // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
           OldIdxOut->valno->def = OldIdxIn->start;
           *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
@@ -1305,7 +1319,8 @@ private:
             // There is no gap between NewSegment and its predecessor.
             *NewSegment = LiveRange::Segment(Next->start, SplitPos,
                                              Next->valno);
-            *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI);
+
+            *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI);
             Next->valno->def = SplitPos;
           } else {
             // There is a gap between NewSegment and its predecessor

Added: llvm/trunk/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir?rev=375300&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir Fri Oct 18 16:24:25 2019
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-misched -run-pass=machine-scheduler -o - %s  | FileCheck %s
+
+---
+name:            handleMoveUp_incorrect_interval
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr4_sgpr5', virtual-reg: '%0' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  scratchWaveOffsetReg: '$sgpr101'
+  frameOffsetReg:  '$sgpr101'
+  stackPtrOffsetReg: '$sgpr101'
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
+    workGroupIDX:    { reg: '$sgpr6' }
+    privateSegmentWaveByteOffset: { reg: '$sgpr7' }
+    workItemIDX:     { reg: '$vgpr0' }
+body:             |
+  ; CHECK-LABEL: name: handleMoveUp_incorrect_interval
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $sgpr4_sgpr5
+  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
+  ; CHECK:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+  ; CHECK:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5329
+  ; CHECK:   undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
+  ; CHECK:   [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+  ; CHECK:   [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   INLINEASM &"", 1, 851978, def dead %11
+  ; CHECK:   GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; CHECK:   [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
+  ; CHECK:   INLINEASM &"def $0 $1", 1, 851978, def %15, 851978, def %16
+  ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
+  ; CHECK:   [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+  ; CHECK:   [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
+  ; CHECK:   INLINEASM &"def $0 $1", 1, 851978, def %21, 851978, def %22
+  ; CHECK:   [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
+  ; CHECK:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
+  ; CHECK:   [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[DEF2]], implicit $exec
+  ; CHECK:   [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U32_e64 64, [[V_ADD_U32_e32_]], implicit $exec
+  ; CHECK:   [[DEF]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
+  ; CHECK:   [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK:   INLINEASM &"", 1, 851978, def dead [[V_MOV_B32_e32_2]], 851978, def dead [[V_MOV_B32_e32_3]], 851977, [[DS_READ_B64_gfx9_]].sub0, 2147483657, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193, [[V_MOV_B32_e32_3]](tied-def 5), 851977, %15, 851977, %16, 851977, [[DS_READ_B32_gfx9_1]], 851977, [[DS_READ_B32_gfx9_]], 851977, [[DS_READ_B32_gfx9_3]], 851977, [[DS_READ_B32_gfx9_2]]
+  ; CHECK:   DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
+  ; CHECK:   DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
+  ; CHECK:   DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
+  ; CHECK:   undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+  ; CHECK:   [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_ADD_U32_e32_]], [[S_MOV_B32_]], implicit $exec
+  ; CHECK:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_ADD_U32_e32_]], [[V_CMP_GT_U32_e64_]], implicit $exec
+  ; CHECK:   [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_]], [[DEF1]], implicit $exec
+  ; CHECK:   [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_CNDMASK_B32_e64_]], [[S_MOV_B32_]], implicit $exec
+  ; CHECK:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_SUB_U32_e32_]], [[DEF]].sub0, implicit $exec
+  ; CHECK:   [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_1]], [[V_MUL_LO_U32_]], implicit $exec
+  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+  ; CHECK:   [[DEF]].sub0:vreg_64 = V_ADD_U32_e32 [[V_SUB_U32_e32_1]], [[V_ADD_U32_e32_1]], implicit $exec
+  ; CHECK:   undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec
+  ; CHECK:   undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec
+  ; CHECK:   undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
+  ; CHECK:   %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
+  ; CHECK:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
+  ; CHECK:   INLINEASM &"", 1
+  ; CHECK:   [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
+  ; CHECK:   GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+  ; CHECK:   %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec
+  ; CHECK:   DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    %0:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
+    %4:sreg_32_xm0 = S_MOV_B32 5329
+    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %7:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
+    %8:vreg_64 = IMPLICIT_DEF
+    %9:vgpr_32 = IMPLICIT_DEF
+    %10:vgpr_32 = IMPLICIT_DEF
+
+  bb.1:
+    INLINEASM &"", 1, 851978, def %11:vgpr_32
+    GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    %13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
+    INLINEASM &"def $0 $1", 1, 851978, def %15:vgpr_32, 851978, def %16:vgpr_32
+    %17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec
+    %18:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec
+    %19:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
+    INLINEASM &"def $0 $1", 1, 851978, def %21:vgpr_32, 851978, def %22:vgpr_32
+    %23:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec
+    %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %5.sub1:vreg_64 = COPY %6
+    %25:vgpr_32 = V_ADD_U32_e32 1, %10, implicit $exec
+    %26:sreg_64_xexec = V_CMP_GT_U32_e64 64, %25, implicit $exec
+    %27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    INLINEASM &"", 1, 851978, def dead %24, 851978, def dead %27, 851977, %13.sub0, 2147483657, %24(tied-def 3), 2147549193, %27(tied-def 5), 851977, %15, 851977, %16, 851977, %18, 851977, %17, 851977, %23, 851977, %19
+    DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
+    DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
+    DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3)
+    undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    %33:vgpr_32 = V_MUL_LO_U32 %25, %4, implicit $exec
+    %10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %25, %26, implicit $exec
+    %34:vgpr_32 = V_SUB_U32_e32 %33, %9, implicit $exec
+    %9:vgpr_32 = V_MUL_LO_U32 %10, %4, implicit $exec
+    %35:vgpr_32 = V_ADD_U32_e32 %34, %8.sub0, implicit $exec
+    %36:vgpr_32 = V_SUB_U32_e32 %9, %33, implicit $exec
+    %37:vgpr_32 = COPY %3.sub1
+    undef %8.sub0:vreg_64 = V_ADD_U32_e32 %36, %35, implicit $exec
+    %8.sub1:vreg_64 = COPY %6
+    undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 %3.sub0, %8.sub0, 0, implicit $exec
+    undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec
+    undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
+    %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
+    %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
+    INLINEASM &"", 1
+    %44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
+    GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    %31.sub0:vreg_64 = COPY %43, implicit $exec
+    DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3)
+    S_BRANCH %bb.1
+
+...

Modified: llvm/trunk/unittests/MI/LiveIntervalTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/MI/LiveIntervalTest.cpp?rev=375300&r1=375299&r2=375300&view=diff
==============================================================================
--- llvm/trunk/unittests/MI/LiveIntervalTest.cpp (original)
+++ llvm/trunk/unittests/MI/LiveIntervalTest.cpp Fri Oct 18 16:24:25 2019
@@ -421,6 +421,46 @@ TEST(LiveIntervalTest, DeadSubRegMoveUp)
   });
 }
 
+TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDef) {
+  liveIntervalTest(R"MIR(
+    %1:vreg_64 = IMPLICIT_DEF
+
+  bb.1:
+    %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec
+    undef %1.sub0:vreg_64 = V_ADD_U32_e32 %2, %2, implicit $exec
+    %1.sub1:vreg_64 = COPY %2
+    S_NOP 0, implicit %1.sub1
+    S_BRANCH %bb.1
+
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+     MachineInstr &UndefSubregDef = getMI(MF, 2, 1);
+     // The scheduler clears undef from subregister defs before moving
+     UndefSubregDef.getOperand(0).setIsUndef(false);
+     testHandleMove(MF, LIS, 3, 1, 1);
+  });
+}
+
+TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDefMulti) {
+  liveIntervalTest(R"MIR(
+    %1:vreg_96 = IMPLICIT_DEF
+
+  bb.1:
+    %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+    %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec
+    undef %1.sub0:vreg_96 = V_ADD_U32_e32 %2, %2, implicit $exec
+    %1.sub1:vreg_96 = COPY %2
+    %1.sub2:vreg_96 = COPY %2
+    S_NOP 0, implicit %1.sub1, implicit %1.sub2
+    S_BRANCH %bb.1
+
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+     MachineInstr &UndefSubregDef = getMI(MF, 2, 1);
+     // The scheduler clears undef from subregister defs before moving
+     UndefSubregDef.getOperand(0).setIsUndef(false);
+     testHandleMove(MF, LIS, 4, 1, 1);
+  });
+}
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);
   initLLVM();




More information about the llvm-commits mailing list