[llvm] AMDGPU: Fix packed 16-bit inline constants (PR #76522)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 2 12:16:01 PST 2024
================
@@ -234,51 +232,143 @@ bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
unsigned Opcode = MI->getOpcode();
int OpNo = MI->getOperandNo(&Old);
+ uint8_t OpType = TII->get(Opcode).operands()[OpNo].OperandType;
+
+ // If the literal can be inlined as-is, apply it and short-circuit the
+ // tests below. The main motivation for this is to avoid unintuitive
+ // uses of opsel.
+ if (AMDGPU::isInlinableLiteralV216(Fold.ImmToFold, OpType)) {
+ Old.ChangeToImmediate(Fold.ImmToFold);
+ return true;
+ }
- // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
- // already set.
+ // Refer to op_sel/op_sel_hi and check if we can change the immediate and
+ // op_sel in a way that allows an inline constant.
int ModIdx = -1;
- if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
+ unsigned SrcIdx = ~0;
+ if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
ModIdx = AMDGPU::OpName::src0_modifiers;
- else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
+ SrcIdx = 0;
+ } else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
ModIdx = AMDGPU::OpName::src1_modifiers;
- else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
+ SrcIdx = 1;
+ } else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
ModIdx = AMDGPU::OpName::src2_modifiers;
+ SrcIdx = 2;
+ }
assert(ModIdx != -1);
ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
MachineOperand &Mod = MI->getOperand(ModIdx);
- unsigned Val = Mod.getImm();
- if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
+ unsigned ModVal = Mod.getImm();
+
+ uint16_t ImmLo = static_cast<uint16_t>(
+ Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
+ uint16_t ImmHi = static_cast<uint16_t>(
+ Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
+ uint32_t Imm = (static_cast<uint32_t>(ImmHi) << 16) | ImmLo;
+ unsigned NewModVal = ModVal & ~(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1);
+
+ // Helper function that attempts to inline the given value with a newly
+ // chosen opsel pattern.
+ auto tryFoldToInline = [&](uint32_t Imm) -> bool {
+ if (AMDGPU::isInlinableLiteralV216(Imm, OpType)) {
+ Mod.setImm(NewModVal | SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate(Imm);
+ return true;
+ }
+
+ // Try to shuffle the halves around and leverage opsel to get an inline
+ // constant.
+ uint16_t Lo = static_cast<uint16_t>(Imm);
+ uint16_t Hi = static_cast<uint16_t>(Imm >> 16);
+ if (Lo == Hi) {
+ if (AMDGPU::isInlinableLiteralV216(Lo, OpType)) {
+ Mod.setImm(NewModVal);
+ Old.ChangeToImmediate(Lo);
+ return true;
+ }
+
+ if (static_cast<int16_t>(Lo) < 0) {
+ int32_t SExt = static_cast<int16_t>(Lo);
+ if (AMDGPU::isInlinableLiteralV216(SExt, OpType)) {
+ Mod.setImm(NewModVal);
+ Old.ChangeToImmediate(SExt);
+ return true;
+ }
+ }
+
+ // This check is only useful for integer instructions
+ if (OpType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
+ OpType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16) {
+ if (AMDGPU::isInlinableLiteralV216(Lo << 16, OpType)) {
+ Mod.setImm(NewModVal | SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate(static_cast<uint32_t>(Lo) << 16);
+ return true;
+ }
+ }
+ } else {
+ uint32_t Swapped = (static_cast<uint32_t>(Lo) << 16) | Hi;
+ if (AMDGPU::isInlinableLiteralV216(Swapped, OpType)) {
+ Mod.setImm(NewModVal | SISrcMods::OP_SEL_0);
+ Old.ChangeToImmediate(Swapped);
+ return true;
+ }
+ }
+
return false;
+ };
- // Only apply the following transformation if that operand requires
- // a packed immediate.
- // If upper part is all zero we do not need op_sel_hi.
- if (!(Fold.ImmToFold & 0xffff)) {
- MachineOperand New =
- MachineOperand::CreateImm((Fold.ImmToFold >> 16) & 0xffff);
- if (!TII->isOperandLegal(*MI, OpNo, &New))
- return false;
- Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
- Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
- Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+ if (tryFoldToInline(Imm))
return true;
+
+ // Replace integer addition by subtraction and vice versa if it allows
+ // folding the immediate to an inline constant.
+ //
+ // We should only ever get here for SrcIdx == 1 due to canonicalization
+ // earlier in the pipeline, but we double-check here to be safe / fully
+ // general.
+ bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
----------------
rampitec wrote:
And this also looks like a separate change along with removing negated constants handling in DAG.
https://github.com/llvm/llvm-project/pull/76522
More information about the llvm-commits
mailing list