[PATCH] D26261: AMDGPU: Preserve vcc undef flags when inverting branch
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 2 14:22:59 PDT 2016
arsenm updated this revision to Diff 76787.
arsenm added a comment.
Fix formatting
https://reviews.llvm.org/D26261
Files:
lib/Target/AMDGPU/SIInsertWaits.cpp
lib/Target/AMDGPU/SIInstrInfo.cpp
Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1268,6 +1268,7 @@
MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
Cond.push_back(MachineOperand::CreateImm(Pred));
+ Cond.push_back(I->getOperand(1)); // Save the branch register.
++I;
@@ -1370,30 +1371,42 @@
= getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
if (!FBB) {
- BuildMI(&MBB, DL, get(Opcode))
+ Cond[1].isUndef();
+ MachineInstr *CondBr =
+ BuildMI(&MBB, DL, get(Opcode))
.addMBB(TBB);
+ // Copy the flags onto the implicit condition register operand.
+ MachineOperand &CondReg = CondBr->getOperand(1);
+ CondReg.setIsUndef(Cond[1].isUndef());
+ CondReg.setIsKill(Cond[1].isKill());
+
if (BytesAdded)
*BytesAdded = 4;
return 1;
}
assert(TBB && FBB);
- BuildMI(&MBB, DL, get(Opcode))
+ MachineInstr *CondBr =
+ BuildMI(&MBB, DL, get(Opcode))
.addMBB(TBB);
BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
.addMBB(FBB);
+ MachineOperand &CondReg = CondBr->getOperand(1);
+ CondReg.setIsUndef(Cond[1].isUndef());
+ CondReg.setIsKill(Cond[1].isKill());
+
if (BytesAdded)
*BytesAdded = 8;
return 2;
}
bool SIInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
- assert(Cond.size() == 1);
+ assert(Cond.size() == 2);
Cond[0].setImm(-Cond[0].getImm());
return false;
}
Index: lib/Target/AMDGPU/SIInsertWaits.cpp
===================================================================
--- lib/Target/AMDGPU/SIInsertWaits.cpp
+++ lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -178,8 +178,10 @@
const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
-static bool readsVCCZ(unsigned Opcode) {
- return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCZ;
+static bool readsVCCZ(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
+ !MI.getOperand(1).isUndef();
}
bool SIInsertWaits::hasOutstandingLGKM() const {
@@ -574,7 +576,7 @@
}
// Check if we need to apply the bug work-around
- if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
+ if (VCCZCorrupt && readsVCCZ(*I)) {
DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
// Wait on everything, not just LGKM. vccz reads usually come from
@@ -589,7 +591,7 @@
// vcc and then writing it back to the register.
BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
AMDGPU::VCC)
- .addReg(AMDGPU::VCC);
+ .addReg(AMDGPU::VCC);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26261.76787.patch
Type: text/x-patch
Size: 2849 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161102/09635d94/attachment.bin>
More information about the llvm-commits
mailing list