[llvm] Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmization for gisel" (PR #146982)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 3 18:44:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (Shoreshen)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->136262
Due to building error:
```
FAILED: lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUInstructionSelector.cpp.o
CCACHE_CPP2=yes CCACHE_HASHDIR=yes CCACHE_SLOPPINESS=pch_defines,time_macros /usr/bin/ccache /home/b/sanitizer-aarch64-linux/build/llvm_build0/bin/clang++ -DGTEST_HAS_RTTI=0 -DLLVM_EXPORTS -D_DEBUG -D_GLIBCXX_ASSERTIONS -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -I/home/b/sanitizer-aarch64-linux/build/build_default/lib/Target/AMDGPU -I/home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/lib/Target/AMDGPU -I/home/b/sanitizer-aarch64-linux/build/build_default/include -I/home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -std=c++17 -fvisibility=hidden -fno-exceptions -funwind-tables -fno-rtti -UNDEBUG -MD -MT lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUInstructionSelector.cpp.o -MF lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUInstructionSelector.cpp.o.d -o lib/Target/AMDGPU/CMakeFiles/LLVMAMDGPUCodeGen.dir/AMDGPUInstructionSelector.cpp.o -c /home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
/home/b/sanitizer-aarch64-linux/build/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp:4566:1: error: non-void function does not return a value in all control paths [-Werror,-Wreturn-type]
4566 | }
| ^
1 error generated.
ninja: build stopped: subcommand failed.
```
---
Patch is 39.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146982.diff
7 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+32-563)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (+1-3)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll (-98)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll (+8-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll (+12-6)
- (modified) llvm/test/CodeGen/AMDGPU/packed-fp32.ll (+18-6)
- (modified) llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll (+9-5)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index fd679a9933cf0..b632b16f5c198 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4327,591 +4327,60 @@ AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
}};
}
-enum class SrcStatus {
- IS_SAME,
- IS_UPPER_HALF,
- IS_LOWER_HALF,
- IS_UPPER_HALF_NEG,
- // This means current op = [op_upper, op_lower] and src = -op_lower.
- IS_LOWER_HALF_NEG,
- IS_HI_NEG,
- // This means current op = [op_upper, op_lower] and src = [op_upper,
- // -op_lower].
- IS_LO_NEG,
- IS_BOTH_NEG,
- INVALID,
- NEG_START = IS_UPPER_HALF_NEG,
- NEG_END = IS_BOTH_NEG,
- HALF_START = IS_UPPER_HALF,
- HALF_END = IS_LOWER_HALF_NEG
-};
-/// Test if the MI is truncating to half, such as `%reg0:n = G_TRUNC %reg1:2n`
-static bool isTruncHalf(const MachineInstr *MI,
- const MachineRegisterInfo &MRI) {
- if (MI->getOpcode() != AMDGPU::G_TRUNC)
- return false;
-
- unsigned DstSize = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
- unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
- return DstSize * 2 == SrcSize;
-}
-
-/// Test if the MI is logic shift right with half bits,
-/// such as `%reg0:2n =G_LSHR %reg1:2n, CONST(n)`
-static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) {
- if (MI->getOpcode() != AMDGPU::G_LSHR)
- return false;
-
- Register ShiftSrc;
- std::optional<ValueAndVReg> ShiftAmt;
- if (mi_match(MI->getOperand(0).getReg(), MRI,
- m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt)))) {
- unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
- unsigned Shift = ShiftAmt->Value.getZExtValue();
- return Shift * 2 == SrcSize;
- }
- return false;
-}
-
-/// Test if the MI is shift left with half bits,
-/// such as `%reg0:2n =G_SHL %reg1:2n, CONST(n)`
-static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) {
- if (MI->getOpcode() != AMDGPU::G_SHL)
- return false;
-
- Register ShiftSrc;
- std::optional<ValueAndVReg> ShiftAmt;
- if (mi_match(MI->getOperand(0).getReg(), MRI,
- m_GShl(m_Reg(ShiftSrc), m_GCst(ShiftAmt)))) {
- unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
- unsigned Shift = ShiftAmt->Value.getZExtValue();
- return Shift * 2 == SrcSize;
- }
- return false;
-}
-
-/// Test function, if the MI is `%reg0:n, %reg1:n = G_UNMERGE_VALUES %reg2:2n`
-static bool isUnmergeHalf(const MachineInstr *MI,
- const MachineRegisterInfo &MRI) {
- if (MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
- return false;
- return MI->getNumOperands() == 3 && MI->getOperand(0).isDef() &&
- MI->getOperand(1).isDef() && !MI->getOperand(2).isDef();
-}
-
-enum class TypeClass { VECTOR_OF_TWO, SCALAR, NONE_OF_LISTED };
-
-static TypeClass isVectorOfTwoOrScalar(Register Reg,
- const MachineRegisterInfo &MRI) {
- LLT OpTy = MRI.getType(Reg);
- if (OpTy.isScalar())
- return TypeClass::SCALAR;
- if (OpTy.isVector() && OpTy.getNumElements() == 2)
- return TypeClass::VECTOR_OF_TWO;
- return TypeClass::NONE_OF_LISTED;
-}
-
-static SrcStatus getNegStatus(Register Reg, SrcStatus S,
- const MachineRegisterInfo &MRI) {
- TypeClass NegType = isVectorOfTwoOrScalar(Reg, MRI);
- if (NegType != TypeClass::VECTOR_OF_TWO && NegType != TypeClass::SCALAR)
- return SrcStatus::INVALID;
-
- switch (S) {
- case SrcStatus::IS_SAME:
- if (NegType == TypeClass::VECTOR_OF_TWO) {
- // Vector of 2:
- // [SrcHi, SrcLo] = [CurrHi, CurrLo]
- // [CurrHi, CurrLo] = neg [OpHi, OpLo](2 x Type)
- // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type)
- // [SrcHi, SrcLo] = [-OpHi, -OpLo]
- return SrcStatus::IS_BOTH_NEG;
- }
- if (NegType == TypeClass::SCALAR) {
- // Scalar:
- // [SrcHi, SrcLo] = [CurrHi, CurrLo]
- // [CurrHi, CurrLo] = neg [OpHi, OpLo](Type)
- // [CurrHi, CurrLo] = [-OpHi, OpLo](Type)
- // [SrcHi, SrcLo] = [-OpHi, OpLo]
- return SrcStatus::IS_HI_NEG;
- }
- break;
- case SrcStatus::IS_HI_NEG:
- if (NegType == TypeClass::VECTOR_OF_TWO) {
- // Vector of 2:
- // [SrcHi, SrcLo] = [-CurrHi, CurrLo]
- // [CurrHi, CurrLo] = neg [OpHi, OpLo](2 x Type)
- // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type)
- // [SrcHi, SrcLo] = [-(-OpHi), -OpLo] = [OpHi, -OpLo]
- return SrcStatus::IS_LO_NEG;
- }
- if (NegType == TypeClass::SCALAR) {
- // Scalar:
- // [SrcHi, SrcLo] = [-CurrHi, CurrLo]
- // [CurrHi, CurrLo] = neg [OpHi, OpLo](Type)
- // [CurrHi, CurrLo] = [-OpHi, OpLo](Type)
- // [SrcHi, SrcLo] = [-(-OpHi), OpLo] = [OpHi, OpLo]
- return SrcStatus::IS_SAME;
- }
- break;
- case SrcStatus::IS_LO_NEG:
- if (NegType == TypeClass::VECTOR_OF_TWO) {
- // Vector of 2:
- // [SrcHi, SrcLo] = [CurrHi, -CurrLo]
- // [CurrHi, CurrLo] = fneg [OpHi, OpLo](2 x Type)
- // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type)
- // [SrcHi, SrcLo] = [-OpHi, -(-OpLo)] = [-OpHi, OpLo]
- return SrcStatus::IS_HI_NEG;
- }
- if (NegType == TypeClass::SCALAR) {
- // Scalar:
- // [SrcHi, SrcLo] = [CurrHi, -CurrLo]
- // [CurrHi, CurrLo] = fneg [OpHi, OpLo](Type)
- // [CurrHi, CurrLo] = [-OpHi, OpLo](Type)
- // [SrcHi, SrcLo] = [-OpHi, -OpLo]
- return SrcStatus::IS_BOTH_NEG;
- }
- break;
- case SrcStatus::IS_BOTH_NEG:
- if (NegType == TypeClass::VECTOR_OF_TWO) {
- // Vector of 2:
- // [SrcHi, SrcLo] = [-CurrHi, -CurrLo]
- // [CurrHi, CurrLo] = fneg [OpHi, OpLo](2 x Type)
- // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type)
- // [SrcHi, SrcLo] = [OpHi, OpLo]
- return SrcStatus::IS_SAME;
- }
- if (NegType == TypeClass::SCALAR) {
- // Scalar:
- // [SrcHi, SrcLo] = [-CurrHi, -CurrLo]
- // [CurrHi, CurrLo] = fneg [OpHi, OpLo](Type)
- // [CurrHi, CurrLo] = [-OpHi, OpLo](Type)
- // [SrcHi, SrcLo] = [OpHi, -OpLo]
- return SrcStatus::IS_LO_NEG;
- }
- break;
- case SrcStatus::IS_UPPER_HALF:
- // Vector of 2:
- // Src = CurrUpper
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](2 x Type)
- // [CurrUpper, CurrLower] = [-OpUpper, -OpLower](2 x Type)
- // Src = -OpUpper
- //
- // Scalar:
- // Src = CurrUpper
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](Type)
- // [CurrUpper, CurrLower] = [-OpUpper, OpLower](Type)
- // Src = -OpUpper
- return SrcStatus::IS_UPPER_HALF_NEG;
- case SrcStatus::IS_LOWER_HALF:
- if (NegType == TypeClass::VECTOR_OF_TWO) {
- // Vector of 2:
- // Src = CurrLower
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](2 x Type)
- // [CurrUpper, CurrLower] = [-OpUpper, -OpLower](2 x Type)
- // Src = -OpLower
- return SrcStatus::IS_LOWER_HALF_NEG;
- }
- if (NegType == TypeClass::SCALAR) {
- // Scalar:
- // Src = CurrLower
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](Type)
- // [CurrUpper, CurrLower] = [-OpUpper, OpLower](Type)
- // Src = OpLower
- return SrcStatus::IS_LOWER_HALF;
- }
- break;
- case SrcStatus::IS_UPPER_HALF_NEG:
- // Vector of 2:
- // Src = -CurrUpper
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](2 x Type)
- // [CurrUpper, CurrLower] = [-OpUpper, -OpLower](2 x Type)
- // Src = -(-OpUpper) = OpUpper
- //
- // Scalar:
- // Src = -CurrUpper
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](Type)
- // [CurrUpper, CurrLower] = [-OpUpper, OpLower](Type)
- // Src = -(-OpUpper) = OpUpper
- return SrcStatus::IS_UPPER_HALF;
- case SrcStatus::IS_LOWER_HALF_NEG:
- if (NegType == TypeClass::VECTOR_OF_TWO) {
- // Vector of 2:
- // Src = -CurrLower
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](2 x Type)
- // [CurrUpper, CurrLower] = [-OpUpper, -OpLower](2 x Type)
- // Src = -(-OpLower) = OpLower
- return SrcStatus::IS_LOWER_HALF;
- }
- if (NegType == TypeClass::SCALAR) {
- // Scalar:
- // Src = -CurrLower
- // Curr = [CurrUpper, CurrLower]
- // [CurrUpper, CurrLower] = fneg [OpUpper, OpLower](Type)
- // [CurrUpper, CurrLower] = [-OpUpper, OpLower](Type)
- // Src = -OpLower
- return SrcStatus::IS_LOWER_HALF_NEG;
- }
- break;
- default:
- llvm_unreachable("unexpected SrcStatus");
- }
-}
-
-static std::optional<std::pair<Register, SrcStatus>>
-calcNextStatus(std::pair<Register, SrcStatus> Curr,
- const MachineRegisterInfo &MRI) {
- const MachineInstr *MI = MRI.getVRegDef(Curr.first);
-
- unsigned Opc = MI->getOpcode();
-
- // Handle general Opc cases.
- switch (Opc) {
- case AMDGPU::G_BITCAST:
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), Curr.second});
- case AMDGPU::COPY:
- if (MI->getOperand(1).getReg().isPhysical())
- return std::nullopt;
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), Curr.second});
- case AMDGPU::G_FNEG: {
- SrcStatus Stat = getNegStatus(Curr.first, Curr.second, MRI);
- if (Stat == SrcStatus::INVALID)
- return std::nullopt;
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), Stat});
- }
- default:
- break;
- }
-
- // Calc next Stat from current Stat.
- switch (Curr.second) {
- case SrcStatus::IS_SAME:
- if (isTruncHalf(MI, MRI))
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF});
- else if (isUnmergeHalf(MI, MRI)) {
- if (Curr.first == MI->getOperand(0).getReg())
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(2).getReg(), SrcStatus::IS_LOWER_HALF});
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(2).getReg(), SrcStatus::IS_UPPER_HALF});
- }
- break;
- case SrcStatus::IS_HI_NEG:
- if (isTruncHalf(MI, MRI)) {
- // [SrcHi, SrcLo] = [-CurrHi, CurrLo]
- // [CurrHi, CurrLo] = trunc [OpUpper, OpLower] = OpLower
- // = [OpLowerHi, OpLowerLo]
- // Src = [SrcHi, SrcLo] = [-CurrHi, CurrLo]
- // = [-OpLowerHi, OpLowerLo]
- // = -OpLower
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF_NEG});
- }
- if (isUnmergeHalf(MI, MRI)) {
- if (Curr.first == MI->getOperand(0).getReg())
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(2).getReg(), SrcStatus::IS_LOWER_HALF_NEG});
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(2).getReg(), SrcStatus::IS_UPPER_HALF_NEG});
- }
- break;
- case SrcStatus::IS_UPPER_HALF:
- if (isShlHalf(MI, MRI))
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF});
- break;
- case SrcStatus::IS_LOWER_HALF:
- if (isLshrHalf(MI, MRI))
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), SrcStatus::IS_UPPER_HALF});
- break;
- case SrcStatus::IS_UPPER_HALF_NEG:
- if (isShlHalf(MI, MRI))
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), SrcStatus::IS_LOWER_HALF_NEG});
- break;
- case SrcStatus::IS_LOWER_HALF_NEG:
- if (isLshrHalf(MI, MRI))
- return std::optional<std::pair<Register, SrcStatus>>(
- {MI->getOperand(1).getReg(), SrcStatus::IS_UPPER_HALF_NEG});
- break;
- default:
- break;
- }
- return std::nullopt;
-}
-
-/// This is used to control valid status that current MI supports. For example,
-/// non floating point intrinsic such as @llvm.amdgcn.sdot2 does not support NEG
-/// bit on VOP3P.
-/// The class can be further extended to recognize support on SEL, NEG, ABS bit
-/// for different MI on different arch
-class SearchOptions {
-private:
- bool HasNeg = false;
- // Assume all complex pattern of VOP3P have opsel.
- bool HasOpsel = true;
-
-public:
- SearchOptions(Register Reg, const MachineRegisterInfo &MRI) {
- const MachineInstr *MI = MRI.getVRegDef(Reg);
- unsigned Opc = MI->getOpcode();
-
- if (Opc < TargetOpcode::GENERIC_OP_END) {
- // Keep same for generic op.
- HasNeg = true;
- } else if (Opc == TargetOpcode::G_INTRINSIC) {
- Intrinsic::ID IntrinsicID = cast<GIntrinsic>(*MI).getIntrinsicID();
- // Only float point intrinsic has neg & neg_hi bits.
- if (IntrinsicID == Intrinsic::amdgcn_fdot2)
- HasNeg = true;
- }
- }
- bool checkOptions(SrcStatus Stat) const {
- if (!HasNeg &&
- (Stat >= SrcStatus::NEG_START && Stat <= SrcStatus::NEG_END)) {
- return false;
- }
- if (!HasOpsel &&
- (Stat >= SrcStatus::HALF_START && Stat <= SrcStatus::HALF_END)) {
- return false;
- }
- return true;
- }
-};
-
-static SmallVector<std::pair<Register, SrcStatus>>
-getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO,
- int MaxDepth = 3) {
- int Depth = 0;
- auto Curr = calcNextStatus({Reg, SrcStatus::IS_SAME}, MRI);
- SmallVector<std::pair<Register, SrcStatus>> Statlist;
-
- while (Depth <= MaxDepth && Curr.has_value()) {
- Depth++;
- if (SO.checkOptions(Curr.value().second))
- Statlist.push_back(Curr.value());
- Curr = calcNextStatus(Curr.value(), MRI);
- }
-
- return Statlist;
-}
-
-static std::pair<Register, SrcStatus>
-getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO,
- int MaxDepth = 3) {
- int Depth = 0;
- std::pair<Register, SrcStatus> LastSameOrNeg = {Reg, SrcStatus::IS_SAME};
- auto Curr = calcNextStatus(LastSameOrNeg, MRI);
-
- while (Depth <= MaxDepth && Curr.has_value()) {
- Depth++;
- SrcStatus Stat = Curr.value().second;
- if (SO.checkOptions(Stat)) {
- if (Stat == SrcStatus::IS_SAME || Stat == SrcStatus::IS_HI_NEG ||
- Stat == SrcStatus::IS_LO_NEG || Stat == SrcStatus::IS_BOTH_NEG)
- LastSameOrNeg = Curr.value();
- }
- Curr = calcNextStatus(Curr.value(), MRI);
- }
-
- return LastSameOrNeg;
-}
-
-static bool isSameBitWidth(Register Reg1, Register Reg2,
- const MachineRegisterInfo &MRI) {
- unsigned Width1 = MRI.getType(Reg1).getSizeInBits();
- unsigned Width2 = MRI.getType(Reg2).getSizeInBits();
- return Width1 == Width2;
-}
-
-static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods) {
- // SrcStatus::IS_LOWER_HALF remain 0.
- if (HiStat == SrcStatus::IS_UPPER_HALF_NEG) {
- Mods ^= SISrcMods::NEG_HI;
- Mods |= SISrcMods::OP_SEL_1;
- } else if (HiStat == SrcStatus::IS_UPPER_HALF)
- Mods |= SISrcMods::OP_SEL_1;
- else if (HiStat == SrcStatus::IS_LOWER_HALF_NEG)
- Mods ^= SISrcMods::NEG_HI;
- else if (HiStat == SrcStatus::IS_HI_NEG)
- Mods ^= SISrcMods::NEG_HI;
-
- if (LoStat == SrcStatus::IS_UPPER_HALF_NEG) {
- Mods ^= SISrcMods::NEG;
- Mods |= SISrcMods::OP_SEL_0;
- } else if (LoStat == SrcStatus::IS_UPPER_HALF)
- Mods |= SISrcMods::OP_SEL_0;
- else if (LoStat == SrcStatus::IS_LOWER_HALF_NEG)
- Mods |= SISrcMods::NEG;
- else if (LoStat == SrcStatus::IS_HI_NEG)
- Mods ^= SISrcMods::NEG;
-
- return Mods;
-}
-
-static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg,
- Register RootReg, const SIInstrInfo &TII,
- const MachineRegisterInfo &MRI) {
- auto IsHalfState = [](SrcStatus S) {
- return S == SrcStatus::IS_UPPER_HALF || S == SrcStatus::IS_UPPER_HALF_NEG ||
- S == SrcStatus::IS_LOWER_HALF || S == SrcStatus::IS_LOWER_HALF_NEG;
- };
- return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
- IsHalfState(HiStat);
-}
-
-std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
- Register RootReg, const MachineRegisterInfo &MRI, bool IsDOT) const {
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectVOP3PModsImpl(
+ Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const {
unsigned Mods = 0;
- // No modification if Root type is not form of <2 x Type>.
- if (isVectorOfTwoOrScalar(RootReg, MRI) != TypeClass::VECTOR_OF_TWO) {
- Mods |= SISrcMods::OP_SEL_1;
- return {RootReg, Mods};
- }
-
- SearchOptions SO(RootReg, MRI);
-
- std::pair<Register, SrcStatus> Stat = getLastSameOrNeg(RootReg, MRI, SO);
+ MachineInstr *MI = MRI.getVRegDef(Src);
- if (Stat.second == SrcStatus::IS_BOTH_NEG)
+ if (MI->getOpcode() == AMDGPU::G_FNEG &&
+ // It's possible to see an f32 fneg here, but unlikely.
+ // TODO: Treat f32 fneg as only high bit.
+ MRI.getType(Src) == LLT::fixed_vector(2, 16)) {
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
- else if (Stat.second == SrcStatus::IS_HI_NEG)
- Mods ^= SISrcMods::NEG_HI;
- else if (Stat.second == SrcStatus::IS_LO_NEG)
- Mods ^= SISrcMods::NEG;
-
- MachineInstr *MI = MRI.getVRegDef(Stat.first);
-
- if (MI->getOpcode() != AMDGPU::G_BUILD_VECTOR || MI->getNumOperands() != 3 ||
- (IsDOT && Subtarget->hasDOTOpSelHazard())) {
- Mods |= SISrcMods::OP_SEL_1;
- return {Stat.first, Mods};
+ Src = MI->getOperand(1).getReg();
+ MI = MRI.getVRegDef(Src);
}
- SmallVector<std::pair<Register, SrcStatus>> StatlistHi =
- getSrcStats(MI->getOperand(2).getReg(), MRI, SO);
+ // TODO: Handle G_FSUB 0 as fneg
- if (StatlistHi.empty()) {
- Mods |= SISrcMods::OP_SEL_1;
- return {Stat.first, Mods};
- }
+ // TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
+ (void)IsDOT; // DOTs do not use OPSEL on gfx942+, check ST.hasDOTOpSelHazard()
- SmallVector<std::pair<Register, SrcStatus>> StatlistLo =
- getSrcStats(MI->getOperand(1).getReg(), MRI, SO);
-
- if (StatlistLo.empty()) {
- Mods |= SISrcMods::OP_SEL_1;
- return {Stat.first, Mods};
- }
-
- for (int I = StatlistHi.size() - 1; I >= 0; I--) {
- for (int J = StatlistLo.size() - 1; J >= 0; J--) {
- if (StatlistHi[I].first == StatlistLo[J].first &&
- isValidToPack(StatlistHi[I].second, StatlistLo[J].second,
- StatlistHi[I].first, RootReg, TII, MRI))
- return {StatlistHi[I].first,
- updateMods(StatlistHi[I].second, StatlistLo[J].second, Mods)};
- }
- }
// Packed instructions do not have abs modifiers.
Mods |= SISrcMods::OP_SEL_1;
- return {Stat.first, Mods};
-}
-
-int64_t getAllKindImm(const MachineOperand *Op) {
- switch (Op->getType()) {
- case MachineOperand::MachineOperandType::MO_Immediate:
- return Op->getImm();
- case MachineOperand::MachineOperandType::MO_CImmediate:
- return Op->getCImm()->getSExtValue();
- case MachineOperand::MachineOperandType::MO_FPImmediate:
- return Op->getFPImm()->getValueAPF().bitcastToAPInt().getSExtValue();
- default:
- llvm_unreachable("not an imm type");
- }
-}
-
-static bool checkRB(Register Reg, unsigned int RBNo,
- const AMDGPURegisterBankInfo &RBI,
- const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) {
- const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
- return RB->getID() == RBNo;
-}
-
-// This function is used to get the correct register bank for returned reg.
-// Assume:
-// 1. VOP3P is always legal for VGPR.
-// 2. RootOp's regbank is legal.
-// Thus
-// 1. If RootOp is SGPR, then NewOp can be SGPR or VGPR.
-// 2. If RootOp is VGPR, then NewOp must be VGPR.
-static Register getLegalRegBank(Register NewReg, Register RootReg,
- const AMDGPURegisterBankInfo &RBI,
- MachineRegisterInfo &MRI,
- const TargetRegis...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146982
More information about the llvm-commits
mailing list