[llvm] [AMDGPU] Extend zero initialization of return values for TFE (PR #85759)
David Stuttard via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 02:59:11 PDT 2024
================
@@ -15033,57 +15033,66 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
// result register that will be written in the case of a memory access failure.
// The required code is also added to tie this init code to the result of the
// img instruction.
-void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
+void SITargetLowering::AddMemOpInit(MachineInstr &MI) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
- MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
- MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
- MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
+ int DstIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+ unsigned InitIdx = 0;
- if (!TFE && !LWE) // intersect_ray
- return;
+ if (TII->isImage(MI)) {
+ MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
+ MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
+ MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
- unsigned TFEVal = TFE ? TFE->getImm() : 0;
- unsigned LWEVal = LWE ? LWE->getImm() : 0;
- unsigned D16Val = D16 ? D16->getImm() : 0;
+ if (!TFE && !LWE) // intersect_ray
+ return;
- if (!TFEVal && !LWEVal)
- return;
+ unsigned TFEVal = TFE ? TFE->getImm() : 0;
+ unsigned LWEVal = LWE ? LWE->getImm() : 0;
+ unsigned D16Val = D16 ? D16->getImm() : 0;
- // At least one of TFE or LWE are non-zero
- // We have to insert a suitable initialization of the result value and
- // tie this to the dest of the image instruction.
+ if (!TFEVal && !LWEVal)
+ return;
- const DebugLoc &DL = MI.getDebugLoc();
+ // At least one of TFE or LWE are non-zero
+ // We have to insert a suitable initialization of the result value and
+ // tie this to the dest of the image instruction.
- int DstIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+ // Calculate which dword we have to initialize to 0.
+ MachineOperand *MO_Dmask = TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
- // Calculate which dword we have to initialize to 0.
- MachineOperand *MO_Dmask = TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
+ // check that dmask operand is found.
+ assert(MO_Dmask && "Expected dmask operand in instruction");
- // check that dmask operand is found.
- assert(MO_Dmask && "Expected dmask operand in instruction");
+ unsigned dmask = MO_Dmask->getImm();
+ // Determine the number of active lanes taking into account the
+ // Gather4 special case
+ unsigned ActiveLanes = TII->isGather4(MI) ? 4 : llvm::popcount(dmask);
- unsigned dmask = MO_Dmask->getImm();
- // Determine the number of active lanes taking into account the
- // Gather4 special case
- unsigned ActiveLanes = TII->isGather4(MI) ? 4 : llvm::popcount(dmask);
+ bool Packed = !Subtarget->hasUnpackedD16VMem();
- bool Packed = !Subtarget->hasUnpackedD16VMem();
+ InitIdx = D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
- unsigned InitIdx =
- D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
-
- // Abandon attempt if the dst size isn't large enough
- // - this is in fact an error but this is picked up elsewhere and
- // reported correctly.
- uint32_t DstSize = TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
- if (DstSize < InitIdx)
+ // Abandon attempt if the dst size isn't large enough
+ // - this is in fact an error but this is picked up elsewhere and
+ // reported correctly.
+ uint32_t DstSize =
+ TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
+ if (DstSize < InitIdx)
+ return;
+ } else if (TII->isMUBUF(MI) && AMDGPU::getMUBUFHasTFE(MI.getOpcode())) {
+ uint32_t DstSize =
----------------
dstutt wrote:
Done
https://github.com/llvm/llvm-project/pull/85759
More information about the llvm-commits
mailing list