[llvm] [AMDGPU] Extend zero initialization of return values for TFE (PR #85759)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 03:23:28 PDT 2024
================
@@ -15033,57 +15038,64 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
// result register that will be written in the case of a memory access failure.
// The required code is also added to tie this init code to the result of the
// img instruction.
-void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
+void SITargetLowering::AddMemOpInit(MachineInstr &MI) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
- MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
- MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
- MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
-
- if (!TFE && !LWE) // intersect_ray
- return;
+ int DstIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+ unsigned InitIdx = 0;
- unsigned TFEVal = TFE ? TFE->getImm() : 0;
- unsigned LWEVal = LWE ? LWE->getImm() : 0;
- unsigned D16Val = D16 ? D16->getImm() : 0;
+ if (TII->isImage(MI)) {
+ MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
+ MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
+ MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
- if (!TFEVal && !LWEVal)
- return;
+ if (!TFE && !LWE) // intersect_ray
+ return;
- // At least one of TFE or LWE are non-zero
- // We have to insert a suitable initialization of the result value and
- // tie this to the dest of the image instruction.
+ unsigned TFEVal = TFE ? TFE->getImm() : 0;
+ unsigned LWEVal = LWE ? LWE->getImm() : 0;
+ unsigned D16Val = D16 ? D16->getImm() : 0;
- const DebugLoc &DL = MI.getDebugLoc();
+ if (!TFEVal && !LWEVal)
+ return;
- int DstIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+ // At least one of TFE or LWE are non-zero
+ // We have to insert a suitable initialization of the result value and
+ // tie this to the dest of the image instruction.
- // Calculate which dword we have to initialize to 0.
- MachineOperand *MO_Dmask = TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
+ // Calculate which dword we have to initialize to 0.
+ MachineOperand *MO_Dmask = TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
- // check that dmask operand is found.
- assert(MO_Dmask && "Expected dmask operand in instruction");
+ // check that dmask operand is found.
+ assert(MO_Dmask && "Expected dmask operand in instruction");
- unsigned dmask = MO_Dmask->getImm();
- // Determine the number of active lanes taking into account the
- // Gather4 special case
- unsigned ActiveLanes = TII->isGather4(MI) ? 4 : llvm::popcount(dmask);
+ unsigned dmask = MO_Dmask->getImm();
+ // Determine the number of active lanes taking into account the
+ // Gather4 special case
+ unsigned ActiveLanes = TII->isGather4(MI) ? 4 : llvm::popcount(dmask);
- bool Packed = !Subtarget->hasUnpackedD16VMem();
+ bool Packed = !Subtarget->hasUnpackedD16VMem();
- unsigned InitIdx =
- D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
+ InitIdx = D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
- // Abandon attempt if the dst size isn't large enough
- // - this is in fact an error but this is picked up elsewhere and
- // reported correctly.
- uint32_t DstSize = TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
- if (DstSize < InitIdx)
+ // Abandon attempt if the dst size isn't large enough
+ // - this is in fact an error but this is picked up elsewhere and
+ // reported correctly.
+ uint32_t DstSize =
+ TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
+ if (DstSize < InitIdx)
+ return;
+ } else if (TII->isMUBUF(MI) && AMDGPU::getMUBUFTfe(MI.getOpcode())) {
+ InitIdx = TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
+ } else {
return;
+ }
+
+ const DebugLoc &DL = MI.getDebugLoc();
// Create a register for the initialization value.
Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
----------------
arsenm wrote:
This could just clone the register instead of querying the regclass
https://github.com/llvm/llvm-project/pull/85759
More information about the llvm-commits
mailing list