[llvm] 9bda1de - [TwoAddressInstruction] Propagate undef flags for partial defs (#79286)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 6 23:46:03 PST 2024


Author: Carl Ritson
Date: 2024-02-07T16:46:00+09:00
New Revision: 9bda1de0b6096d26e87fed18cb681cc3e5b8319a

URL: https://github.com/llvm/llvm-project/commit/9bda1de0b6096d26e87fed18cb681cc3e5b8319a
DIFF: https://github.com/llvm/llvm-project/commit/9bda1de0b6096d26e87fed18cb681cc3e5b8319a.diff

LOG: [TwoAddressInstruction] Propagate undef flags for partial defs (#79286)

If part of a register (lowered from REG_SEQUENCE) is undefined then we
should propagate undef flags to uses of those lanes. This is only
performed when live intervals are present as it requires live intervals
to correctly match uses to defs, and the primary goal is to allow
precise computation of subrange intervals.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir

Modified: 
    llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 74d7904aee33a2..ebacbc420f8580 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1929,21 +1929,27 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
   Register DstReg = MI.getOperand(0).getReg();
 
   SmallVector<Register, 4> OrigRegs;
+  VNInfo *DefVN = nullptr;
   if (LIS) {
     OrigRegs.push_back(MI.getOperand(0).getReg());
     for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
       OrigRegs.push_back(MI.getOperand(i).getReg());
+    if (LIS->hasInterval(DstReg)) {
+      DefVN = LIS->getInterval(DstReg)
+                  .Query(LIS->getInstructionIndex(MI))
+                  .valueOut();
+    }
   }
 
+  LaneBitmask UndefLanes = LaneBitmask::getNone();
   bool DefEmitted = false;
-  bool DefIsPartial = false;
   for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
     MachineOperand &UseMO = MI.getOperand(i);
     Register SrcReg = UseMO.getReg();
     unsigned SubIdx = MI.getOperand(i+1).getImm();
     // Nothing needs to be inserted for undef operands.
     if (UseMO.isUndef()) {
-      DefIsPartial = true;
+      UndefLanes |= TRI->getSubRegIndexLaneMask(SubIdx);
       continue;
     }
 
@@ -1991,11 +1997,25 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
       MI.removeOperand(j);
   } else {
     if (LIS) {
-      // Force interval recomputation if we moved from full definition
-      // of register to partial.
-      if (DefIsPartial && LIS->hasInterval(DstReg) &&
-          MRI->shouldTrackSubRegLiveness(DstReg))
+      // Force live interval recomputation if we moved to a partial definition
+      // of the register.  Undef flags must be propagate to uses of undefined
+      // subregister for accurate interval computation.
+      if (UndefLanes.any() && DefVN && MRI->shouldTrackSubRegLiveness(DstReg)) {
+        auto &LI = LIS->getInterval(DstReg);
+        for (MachineOperand &UseOp : MRI->use_operands(DstReg)) {
+          unsigned SubReg = UseOp.getSubReg();
+          if (UseOp.isUndef() || !SubReg)
+            continue;
+          auto *VN =
+              LI.getVNInfoAt(LIS->getInstructionIndex(*UseOp.getParent()));
+          if (DefVN != VN)
+            continue;
+          LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+          if ((UndefLanes & LaneMask).any())
+            UseOp.setIsUndef(true);
+        }
         LIS->removeInterval(DstReg);
+      }
       LIS->RemoveMachineInstrFromMaps(MI);
     }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
new file mode 100644
index 00000000000000..6c13756ab1c690
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
@@ -0,0 +1,125 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -early-live-intervals -run-pass=liveintervals -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name:            dyn_extract_v7f64_v_v
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+
+    ; CHECK-LABEL: name: dyn_extract_v7f64_v_v
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr10
+    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr11
+    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr12
+    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr13
+    ; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]]
+    ; CHECK-NEXT: [[COPY14:%[0-9]+]].sub1:vreg_64 = COPY [[COPY1]]
+    ; CHECK-NEXT: undef [[COPY15:%[0-9]+]].sub0:vreg_64 = COPY [[COPY2]]
+    ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:vreg_64 = COPY [[COPY3]]
+    ; CHECK-NEXT: undef [[COPY16:%[0-9]+]].sub0:vreg_64 = COPY [[COPY4]]
+    ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
+    ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_64 = COPY [[COPY6]]
+    ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:vreg_64 = COPY [[COPY7]]
+    ; CHECK-NEXT: undef [[COPY18:%[0-9]+]].sub0:vreg_64 = COPY [[COPY8]]
+    ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:vreg_64 = COPY [[COPY9]]
+    ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub0:vreg_64 = COPY [[COPY10]]
+    ; CHECK-NEXT: [[COPY19:%[0-9]+]].sub1:vreg_64 = COPY [[COPY11]]
+    ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub0:vreg_64 = COPY [[COPY12]]
+    ; CHECK-NEXT: [[COPY20:%[0-9]+]].sub1:vreg_64 = COPY [[COPY13]]
+    ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY $vgpr14
+    ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0_sub1:vreg_512 = COPY [[COPY14]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub2_sub3:vreg_512 = COPY [[COPY15]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub4_sub5:vreg_512 = COPY [[COPY16]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub6_sub7:vreg_512 = COPY [[COPY17]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub8_sub9:vreg_512 = COPY [[COPY18]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub10_sub11:vreg_512 = COPY [[COPY19]]
+    ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub12_sub13:vreg_512 = COPY [[COPY20]]
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 1, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub0, 0, [[COPY22]].sub2, [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub1, 0, [[COPY22]].sub3, [[V_CMP_EQ_U32_e64_]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 2, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY22]].sub4, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_1]], 0, [[COPY22]].sub5, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 3, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_4:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_2]], 0, [[COPY22]].sub6, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_5:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_3]], 0, [[COPY22]].sub7, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 4, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_6:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_4]], 0, [[COPY22]].sub8, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_7:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_5]], 0, [[COPY22]].sub9, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 5, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_8:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_6]], 0, [[COPY22]].sub10, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_9:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_7]], 0, [[COPY22]].sub11, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 6, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_10:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_8]], 0, [[COPY22]].sub12, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_11:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_9]], 0, [[COPY22]].sub13, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+    ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_6:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 7, [[COPY21]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_12:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_10]], 0, undef [[COPY22]].sub14, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+    ; CHECK-NEXT: [[V_CNDMASK_B32_e64_13:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_11]], 0, undef [[COPY22]].sub15, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+    ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_12]]
+    ; CHECK-NEXT: $vgpr1 = COPY [[V_CNDMASK_B32_e64_13]]
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+    %2:vgpr_32 = COPY $vgpr0
+    %3:vgpr_32 = COPY $vgpr1
+    %4:vgpr_32 = COPY $vgpr2
+    %5:vgpr_32 = COPY $vgpr3
+    %6:vgpr_32 = COPY $vgpr4
+    %7:vgpr_32 = COPY $vgpr5
+    %8:vgpr_32 = COPY $vgpr6
+    %9:vgpr_32 = COPY $vgpr7
+    %10:vgpr_32 = COPY $vgpr8
+    %11:vgpr_32 = COPY $vgpr9
+    %12:vgpr_32 = COPY $vgpr10
+    %13:vgpr_32 = COPY $vgpr11
+    %14:vgpr_32 = COPY $vgpr12
+    %15:vgpr_32 = COPY $vgpr13
+    %16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %17:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
+    %18:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %7, %subreg.sub1
+    %19:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1
+    %20:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %11, %subreg.sub1
+    %21:vreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1
+    %22:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1
+    %1:vgpr_32 = COPY $vgpr14
+    %34:vreg_512 = REG_SEQUENCE %16, %subreg.sub0_sub1, %17, %subreg.sub2_sub3, %18, %subreg.sub4_sub5, %19, %subreg.sub6_sub7, %20, %subreg.sub8_sub9, %21, %subreg.sub10_sub11, %22, %subreg.sub12_sub13, undef %35:vreg_64, %subreg.sub14_sub15
+    %55:sreg_64_xexec = V_CMP_EQ_U32_e64 1, %1, implicit $exec
+    %56:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub0, 0, %34.sub2, %55, implicit $exec
+    %57:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub1, 0, %34.sub3, %55, implicit $exec
+    %59:sreg_64_xexec = V_CMP_EQ_U32_e64 2, %1, implicit $exec
+    %60:vgpr_32 = V_CNDMASK_B32_e64 0, %56, 0, %34.sub4, %59, implicit $exec
+    %61:vgpr_32 = V_CNDMASK_B32_e64 0, %57, 0, %34.sub5, %59, implicit $exec
+    %63:sreg_64_xexec = V_CMP_EQ_U32_e64 3, %1, implicit $exec
+    %64:vgpr_32 = V_CNDMASK_B32_e64 0, %60, 0, %34.sub6, %63, implicit $exec
+    %65:vgpr_32 = V_CNDMASK_B32_e64 0, %61, 0, %34.sub7, %63, implicit $exec
+    %67:sreg_64_xexec = V_CMP_EQ_U32_e64 4, %1, implicit $exec
+    %68:vgpr_32 = V_CNDMASK_B32_e64 0, %64, 0, %34.sub8, %67, implicit $exec
+    %69:vgpr_32 = V_CNDMASK_B32_e64 0, %65, 0, %34.sub9, %67, implicit $exec
+    %71:sreg_64_xexec = V_CMP_EQ_U32_e64 5, %1, implicit $exec
+    %72:vgpr_32 = V_CNDMASK_B32_e64 0, %68, 0, %34.sub10, %71, implicit $exec
+    %73:vgpr_32 = V_CNDMASK_B32_e64 0, %69, 0, %34.sub11, %71, implicit $exec
+    %75:sreg_64_xexec = V_CMP_EQ_U32_e64 6, %1, implicit $exec
+    %76:vgpr_32 = V_CNDMASK_B32_e64 0, %72, 0, %34.sub12, %75, implicit $exec
+    %77:vgpr_32 = V_CNDMASK_B32_e64 0, %73, 0, %34.sub13, %75, implicit $exec
+    %79:sreg_64_xexec = V_CMP_EQ_U32_e64 7, %1, implicit $exec
+    %80:vgpr_32 = V_CNDMASK_B32_e64 0, %76, 0, %34.sub14, %79, implicit $exec
+    %81:vgpr_32 = V_CNDMASK_B32_e64 0, %77, 0, %34.sub15, %79, implicit $exec
+    $vgpr0 = COPY %80
+    $vgpr1 = COPY %81
+    SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...


        


More information about the llvm-commits mailing list