[llvm] 9bda1de - [TwoAddressInstruction] Propagate undef flags for partial defs (#79286)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 23:46:03 PST 2024
Author: Carl Ritson
Date: 2024-02-07T16:46:00+09:00
New Revision: 9bda1de0b6096d26e87fed18cb681cc3e5b8319a
URL: https://github.com/llvm/llvm-project/commit/9bda1de0b6096d26e87fed18cb681cc3e5b8319a
DIFF: https://github.com/llvm/llvm-project/commit/9bda1de0b6096d26e87fed18cb681cc3e5b8319a.diff
LOG: [TwoAddressInstruction] Propagate undef flags for partial defs (#79286)
If part of a register (lowered from REG_SEQUENCE) is undefined then we
should propagate undef flags to uses of those lanes. This is only
performed when live intervals are present as it requires live intervals
to correctly match uses to defs, and the primary goal is to allow
precise computation of subrange intervals.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
Modified:
llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 74d7904aee33a2..ebacbc420f8580 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1929,21 +1929,27 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
Register DstReg = MI.getOperand(0).getReg();
SmallVector<Register, 4> OrigRegs;
+ VNInfo *DefVN = nullptr;
if (LIS) {
OrigRegs.push_back(MI.getOperand(0).getReg());
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
OrigRegs.push_back(MI.getOperand(i).getReg());
+ if (LIS->hasInterval(DstReg)) {
+ DefVN = LIS->getInterval(DstReg)
+ .Query(LIS->getInstructionIndex(MI))
+ .valueOut();
+ }
}
+ LaneBitmask UndefLanes = LaneBitmask::getNone();
bool DefEmitted = false;
- bool DefIsPartial = false;
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
MachineOperand &UseMO = MI.getOperand(i);
Register SrcReg = UseMO.getReg();
unsigned SubIdx = MI.getOperand(i+1).getImm();
// Nothing needs to be inserted for undef operands.
if (UseMO.isUndef()) {
- DefIsPartial = true;
+ UndefLanes |= TRI->getSubRegIndexLaneMask(SubIdx);
continue;
}
@@ -1991,11 +1997,25 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MI.removeOperand(j);
} else {
if (LIS) {
- // Force interval recomputation if we moved from full definition
- // of register to partial.
- if (DefIsPartial && LIS->hasInterval(DstReg) &&
- MRI->shouldTrackSubRegLiveness(DstReg))
+ // Force live interval recomputation if we moved to a partial definition
+ // of the register. Undef flags must be propagate to uses of undefined
+ // subregister for accurate interval computation.
+ if (UndefLanes.any() && DefVN && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ auto &LI = LIS->getInterval(DstReg);
+ for (MachineOperand &UseOp : MRI->use_operands(DstReg)) {
+ unsigned SubReg = UseOp.getSubReg();
+ if (UseOp.isUndef() || !SubReg)
+ continue;
+ auto *VN =
+ LI.getVNInfoAt(LIS->getInstructionIndex(*UseOp.getParent()));
+ if (DefVN != VN)
+ continue;
+ LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((UndefLanes & LaneMask).any())
+ UseOp.setIsUndef(true);
+ }
LIS->removeInterval(DstReg);
+ }
LIS->RemoveMachineInstrFromMaps(MI);
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
new file mode 100644
index 00000000000000..6c13756ab1c690
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/twoaddr-extract-dyn-v7f64.mir
@@ -0,0 +1,125 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -early-live-intervals -run-pass=liveintervals -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: dyn_extract_v7f64_v_v
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+
+ ; CHECK-LABEL: name: dyn_extract_v7f64_v_v
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr10
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr11
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr12
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr13
+ ; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]].sub1:vreg_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: undef [[COPY15:%[0-9]+]].sub0:vreg_64 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:vreg_64 = COPY [[COPY3]]
+ ; CHECK-NEXT: undef [[COPY16:%[0-9]+]].sub0:vreg_64 = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:vreg_64 = COPY [[COPY5]]
+ ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_64 = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:vreg_64 = COPY [[COPY7]]
+ ; CHECK-NEXT: undef [[COPY18:%[0-9]+]].sub0:vreg_64 = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:vreg_64 = COPY [[COPY9]]
+ ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub0:vreg_64 = COPY [[COPY10]]
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]].sub1:vreg_64 = COPY [[COPY11]]
+ ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub0:vreg_64 = COPY [[COPY12]]
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]].sub1:vreg_64 = COPY [[COPY13]]
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY $vgpr14
+ ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0_sub1:vreg_512 = COPY [[COPY14]]
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub2_sub3:vreg_512 = COPY [[COPY15]]
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub4_sub5:vreg_512 = COPY [[COPY16]]
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub6_sub7:vreg_512 = COPY [[COPY17]]
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub8_sub9:vreg_512 = COPY [[COPY18]]
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub10_sub11:vreg_512 = COPY [[COPY19]]
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]].sub12_sub13:vreg_512 = COPY [[COPY20]]
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 1, [[COPY21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub0, 0, [[COPY22]].sub2, [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY22]].sub1, 0, [[COPY22]].sub3, [[V_CMP_EQ_U32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 2, [[COPY21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY22]].sub4, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_3:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_1]], 0, [[COPY22]].sub5, [[V_CMP_EQ_U32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 3, [[COPY21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_4:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_2]], 0, [[COPY22]].sub6, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_5:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_3]], 0, [[COPY22]].sub7, [[V_CMP_EQ_U32_e64_2]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 4, [[COPY21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_6:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_4]], 0, [[COPY22]].sub8, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_7:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_5]], 0, [[COPY22]].sub9, [[V_CMP_EQ_U32_e64_3]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 5, [[COPY21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_8:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_6]], 0, [[COPY22]].sub10, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_9:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_7]], 0, [[COPY22]].sub11, [[V_CMP_EQ_U32_e64_4]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 6, [[COPY21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_10:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_8]], 0, [[COPY22]].sub12, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_11:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_9]], 0, [[COPY22]].sub13, [[V_CMP_EQ_U32_e64_5]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_6:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 7, [[COPY21]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_12:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_10]], 0, undef [[COPY22]].sub14, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_13:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_11]], 0, undef [[COPY22]].sub15, [[V_CMP_EQ_U32_e64_6]], implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_12]]
+ ; CHECK-NEXT: $vgpr1 = COPY [[V_CNDMASK_B32_e64_13]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ %2:vgpr_32 = COPY $vgpr0
+ %3:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr9
+ %12:vgpr_32 = COPY $vgpr10
+ %13:vgpr_32 = COPY $vgpr11
+ %14:vgpr_32 = COPY $vgpr12
+ %15:vgpr_32 = COPY $vgpr13
+ %16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %17:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
+ %18:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %7, %subreg.sub1
+ %19:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1
+ %20:vreg_64 = REG_SEQUENCE %10, %subreg.sub0, %11, %subreg.sub1
+ %21:vreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1
+ %22:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %15, %subreg.sub1
+ %1:vgpr_32 = COPY $vgpr14
+ %34:vreg_512 = REG_SEQUENCE %16, %subreg.sub0_sub1, %17, %subreg.sub2_sub3, %18, %subreg.sub4_sub5, %19, %subreg.sub6_sub7, %20, %subreg.sub8_sub9, %21, %subreg.sub10_sub11, %22, %subreg.sub12_sub13, undef %35:vreg_64, %subreg.sub14_sub15
+ %55:sreg_64_xexec = V_CMP_EQ_U32_e64 1, %1, implicit $exec
+ %56:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub0, 0, %34.sub2, %55, implicit $exec
+ %57:vgpr_32 = V_CNDMASK_B32_e64 0, %34.sub1, 0, %34.sub3, %55, implicit $exec
+ %59:sreg_64_xexec = V_CMP_EQ_U32_e64 2, %1, implicit $exec
+ %60:vgpr_32 = V_CNDMASK_B32_e64 0, %56, 0, %34.sub4, %59, implicit $exec
+ %61:vgpr_32 = V_CNDMASK_B32_e64 0, %57, 0, %34.sub5, %59, implicit $exec
+ %63:sreg_64_xexec = V_CMP_EQ_U32_e64 3, %1, implicit $exec
+ %64:vgpr_32 = V_CNDMASK_B32_e64 0, %60, 0, %34.sub6, %63, implicit $exec
+ %65:vgpr_32 = V_CNDMASK_B32_e64 0, %61, 0, %34.sub7, %63, implicit $exec
+ %67:sreg_64_xexec = V_CMP_EQ_U32_e64 4, %1, implicit $exec
+ %68:vgpr_32 = V_CNDMASK_B32_e64 0, %64, 0, %34.sub8, %67, implicit $exec
+ %69:vgpr_32 = V_CNDMASK_B32_e64 0, %65, 0, %34.sub9, %67, implicit $exec
+ %71:sreg_64_xexec = V_CMP_EQ_U32_e64 5, %1, implicit $exec
+ %72:vgpr_32 = V_CNDMASK_B32_e64 0, %68, 0, %34.sub10, %71, implicit $exec
+ %73:vgpr_32 = V_CNDMASK_B32_e64 0, %69, 0, %34.sub11, %71, implicit $exec
+ %75:sreg_64_xexec = V_CMP_EQ_U32_e64 6, %1, implicit $exec
+ %76:vgpr_32 = V_CNDMASK_B32_e64 0, %72, 0, %34.sub12, %75, implicit $exec
+ %77:vgpr_32 = V_CNDMASK_B32_e64 0, %73, 0, %34.sub13, %75, implicit $exec
+ %79:sreg_64_xexec = V_CMP_EQ_U32_e64 7, %1, implicit $exec
+ %80:vgpr_32 = V_CNDMASK_B32_e64 0, %76, 0, %34.sub14, %79, implicit $exec
+ %81:vgpr_32 = V_CNDMASK_B32_e64 0, %77, 0, %34.sub15, %79, implicit $exec
+ $vgpr0 = COPY %80
+ $vgpr1 = COPY %81
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
More information about the llvm-commits
mailing list