[llvm] [AMDGPU] Use SDNodeXForm to select a few VOP3P modifiers, NFC (PR #151907)
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 3 23:59:40 PDT 2025
https://github.com/changpeng updated https://github.com/llvm/llvm-project/pull/151907
>From 3c1f10d9f750d5374a2f4f7c85a7ce5851ffe46e Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Sun, 3 Aug 2025 23:02:20 -0700
Subject: [PATCH 1/3] [AMDGPU] Use SDNodeXForm to select a few VOP3P modifiers,
NFC
It is not necessary to use ComplexPattern to select VOP3PModsNeg,
VOP3PModsNegs and VOP3PModsNegAbs. We can use SDNodeXForm instead.
---
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 19 ++--
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 57 ------------
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 3 -
.../AMDGPU/AMDGPUInstructionSelector.cpp | 92 +++++++------------
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 14 +--
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 29 +++++-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 46 +++++-----
7 files changed, 95 insertions(+), 165 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 992572f17e5b9..394a143dd3086 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,18 +51,6 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher<s32, "selectVOP3PModsDOT">,
GIComplexPatternEquiv<VOP3PModsDOT>;
-def gi_vop3pmodsneg :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNeg">,
- GIComplexPatternEquiv<VOP3PModsNeg>;
-
-def gi_vop3pmodsnegs :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNegs">,
- GIComplexPatternEquiv<VOP3PModsNegs>;
-
-def gi_dotiuvop3pmodsnegabs :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNegAbs">,
- GIComplexPatternEquiv<VOP3PModsNegAbs>;
-
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
@@ -452,6 +440,13 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
GISDNodeXFormEquiv<as_hw_round_mode>;
+def gi_VOP3PModsNeg : GICustomOperandRenderer<"renderVOP3PModsNeg">,
+ GISDNodeXFormEquiv<VOP3PModsNeg>;
+def gi_VOP3PModsNegs : GICustomOperandRenderer<"renderVOP3PModsNegs">,
+ GISDNodeXFormEquiv<VOP3PModsNegs>;
+def gi_VOP3PModsNegAbs : GICustomOperandRenderer<"renderVOP3PModsNegAbs">,
+ GISDNodeXFormEquiv<VOP3PModsNegAbs>;
+
def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
GISDNodeXFormEquiv<PrefetchLoc>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 39b42002b907a..fb83388e5e265 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3449,63 +3449,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
-// Select neg_lo from the i1 immediate operand.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // 1 promotes packed values to signed, 0 treats them as unsigned.
- assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcSign = C->getZExtValue();
- if (SrcSign == 1)
- Mods ^= SISrcMods::NEG;
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
-// Select both neg_lo and neg_hi from the i1 immediate operand. This is
-// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
-// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // 1 promotes packed values to signed, 0 treats them as unsigned.
- assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcSign = C->getZExtValue();
- if (SrcSign == 1)
- Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
-// Select neg, abs, or both neg and abs from the i16 immediate operans.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcMod = C->getZExtValue();
- switch (SrcMod) {
- default: // Any other value will be silently ignored (considered as 0).
- break;
- case 1:
- Mods ^= SISrcMods::NEG;
- break;
- case 2:
- Mods ^= SISrcMods::ABS;
- break;
- case 3:
- Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
- break;
- }
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 983f1aa8fab86..16388e750026c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -241,9 +241,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
- bool SelectVOP3PModsNegs(SDValue In, SDValue &Src) const;
- bool SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b0d3b12471a38..212f4872303de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4988,66 +4988,6 @@ AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
-// Select neg_lo from the i1 immediate operand.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // Value is in Imm operand as i1 sign extended to int64_t.
- // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
- assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
- "expected i1 value");
- unsigned Mods = SISrcMods::OP_SEL_1;
- if (Root.getImm() == -1)
- Mods ^= SISrcMods::NEG;
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
-// Select both neg_lo and neg_hi from the i1 immediate operand. This is
-// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
-// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNegs(MachineOperand &Root) const {
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // Value is in Imm operand as i1 sign extended to int64_t.
- // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
- assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
- "expected i1 value");
- unsigned Mods = SISrcMods::OP_SEL_1;
- if (Root.getImm() == -1)
- Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
-// Select neg, abs, or both neg and abs from the i16 immediate operans.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNegAbs(MachineOperand &Root) const {
-
- assert(Root.isImm() && "Modifier for C must be an immediate");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- switch (Root.getImm()) {
- default: // Any other value will be silently ignored (considered as 0).
- break;
- case 1:
- Mods ^= SISrcMods::NEG;
- break;
- case 2:
- Mods ^= SISrcMods::ABS;
- break;
- case 3:
- Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
- break;
- }
-
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
MachineOperand &Root) const {
@@ -7102,6 +7042,38 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
}
+void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (MI.getOperand(OpIdx).getImm())
+ Mods ^= SISrcMods::NEG;
+ MIB.addImm((int64_t)Mods);
+}
+
+void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (MI.getOperand(OpIdx).getImm())
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ MIB.addImm((int64_t)Mods);
+}
+
+void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Val = MI.getOperand(OpIdx).getImm();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Val == 1)
+ Mods ^= SISrcMods::NEG;
+ if (Val == 2)
+ Mods ^= SISrcMods::ABS;
+ if (Val == 3)
+ Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
+ MIB.addImm((int64_t)Mods);
+}
+
void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 140e753bf976a..d0b5dc5e11e39 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -199,13 +199,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectVOP3PModsDOT(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNeg(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNegs(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNegAbs(MachineOperand &Root) const;
-
InstructionSelector::ComplexRendererFns
selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;
@@ -419,6 +412,13 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 4698a5805ee0c..4380ff1bf33d4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -908,6 +908,32 @@ def SupportedRoundMode : TImmLeaf<i32, [{
Imm == (int)RoundingMode::TowardNegative;
}]>;
+def VOP3PModsNeg : SDNodeXForm<timm, [{
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (N->getZExtValue())
+ Mods ^= SISrcMods::NEG;
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
+def VOP3PModsNegs : SDNodeXForm<timm, [{
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (N->getZExtValue())
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
+def VOP3PModsNegAbs : SDNodeXForm<timm, [{
+ unsigned Val = N->getZExtValue();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Val == 1)
+ Mods ^= SISrcMods::NEG;
+ if (Val == 2)
+ Mods ^= SISrcMods::ABS;
+ if (Val == 3)
+ Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
uint64_t Imm = N->getZExtValue();
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
@@ -1647,9 +1673,6 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
-def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
-def VOP3PModsNegs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegs">; // chfang: not use complex pattern?
-def VOP3PModsNegAbs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegAbs">;
def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 95fcd4ac1c101..9cc68c91369a6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -557,11 +557,11 @@ multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
null_frag, 1>;
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
- def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0,
- (VOP3PModsNeg i32:$src1_mods), i32:$src1,
+ def : GCNPat < (intrinsic_node timm:$src0_mods, i32:$src0,
+ timm:$src1_mods, i32:$src1,
i32:$src2, (i1 timm:$clamp)),
- (!cast<Instruction>(NAME) $src0_mods, i32:$src0,
- $src1_mods, i32:$src1,
+ (!cast<Instruction>(NAME) (VOP3PModsNeg $src0_mods), i32:$src0,
+ (VOP3PModsNeg $src1_mods), i32:$src1,
(i32 8), i32:$src2, i1:$clamp)
>;
}
@@ -1302,11 +1302,11 @@ class WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
class WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
GCNPat < (P.DstVT (node
- (VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0),
- (VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1),
+ timm:$src0_modifiers, (P.Src0VT P.Src0VT:$src0),
+ timm:$src1_modifiers, (P.Src1VT P.Src1VT:$src1),
(P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp)
)),
- (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
+ (P.DstVT (Inst (VOP3PModsNeg $src0_modifiers), P.Src0VT:$src0, (VOP3PModsNeg $src1_modifiers), P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
>;
class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
@@ -1551,44 +1551,44 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp));
bit IsAB_F32F64_IMod1 = !and(!or(IsAB_F64, IsAB_F32), HasIModOp);
bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp);
- dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
- IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src0_modifiers), Src0VT:$src0),
+ dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
IsAB_F16_IMod0 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
IsAB_BF16_IMod0 : (ins Src0VT:$src0),
- IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
+ IsIU : (ins timm:$src0_modifiers, Src0VT:$src0),
HasMatrixFMT : (ins timm:$matrix_a_fmt, Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
- dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
- IsAB_F16BF16_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
+ dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src0_modifiers), Src0VT:$src0),
IsAB_F16_IMod0 : (ins i32:$src0_modifiers, Src0VT:$src0),
IsAB_BF16_IMod0 : (ins (i32 8), Src0VT:$src0),
- IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
+ IsIU : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
- dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
- IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src1_modifiers), Src1VT:$src1),
+ dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
IsAB_F16_IMod0 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
IsAB_BF16_IMod0 : (ins Src1VT:$src1),
- IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
+ IsIU : (ins timm:$src1_modifiers, Src1VT:$src1),
HasMatrixFMT : (ins timm:$matrix_b_fmt, Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
- dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
- IsAB_F16BF16_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
+ dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src1_modifiers), Src1VT:$src1),
IsAB_F16_IMod0 : (ins i32:$src1_modifiers, Src1VT:$src1),
IsAB_BF16_IMod0 : (ins (i32 8), Src1VT:$src1),
- IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
+ IsIU : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32));
bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp));
bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp));
bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp));
bit IsIUXF32 = !or(IsIU, IsXF32);
- dag Src2InPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs i32:$src2_modifiers), Src2VT:$src2),
+ dag Src2InPatWmma = !cond(IsC_IMod1 : (ins timm:$src2_modifiers, Src2VT:$src2),
IsC_F32_IMod0 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_F16_IMod0 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_BF16_IMod0 : (ins Src2VT:$src2),
IsIUXF32 : (ins Src2VT:$src2),
IsSWMMAC : (ins));
- dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins i32:$src2_modifiers, Src2VT:$src2),
+ dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs $src2_modifiers), Src2VT:$src2),
IsC_F32_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_F16_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_BF16_IMod0 : (ins (i32 8), Src2VT:$src2),
@@ -1604,8 +1604,8 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
!eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit),
!eq(IndexType, 32): (ins i64:$src2, i32:$index_key_32bit));
dag MatrixFMTOutPat = !if(HasMatrixFMT, (ins i32:$matrix_a_fmt, i32:$matrix_b_fmt), (ins));
- dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins (VOP3PModsNegAbs i32:$src2_modifiers)), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
- dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins i32:$src2_modifiers), (ins (i32 8)))), (ins Src2VT:$src2));
+ dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins timm:$src2_modifiers), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
+ dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins (VOP3PModsNegAbs $src2_modifiers)), (ins (i32 8)))), (ins Src2VT:$src2));
dag MatrixReuseInPat = !if(HasMatrixReuse, (ins timm:$matrix_a_reuse, timm:$matrix_b_reuse), (ins));
dag MatrixReuseOutModPat = !if(HasMatrixReuse, (ins i1:$matrix_a_reuse, i1:$matrix_b_reuse), (ins));
>From 25fe5a75df37d101d1b21722804e2e89045e66be Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Sun, 3 Aug 2025 23:09:30 -0700
Subject: [PATCH 2/3] [AMDGPU] Fix a wrong indentation
---
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 212f4872303de..50c65287c708d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -7046,8 +7046,8 @@ void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
unsigned Mods = SISrcMods::OP_SEL_1;
- if (MI.getOperand(OpIdx).getImm())
- Mods ^= SISrcMods::NEG;
+ if (MI.getOperand(OpIdx).getImm())
+ Mods ^= SISrcMods::NEG;
MIB.addImm((int64_t)Mods);
}
>From f9bc1f63344bd158ceeee068dbd337e3be6ae188 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Sun, 3 Aug 2025 23:59:05 -0700
Subject: [PATCH 3/3] [AMDGPU] Fix clang format
---
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 12 ++++++------
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 6 +++---
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 50c65287c708d..30d0e0d2da10b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -7043,8 +7043,8 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
}
void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
+ const MachineInstr &MI,
+ int OpIdx) const {
unsigned Mods = SISrcMods::OP_SEL_1;
if (MI.getOperand(OpIdx).getImm())
Mods ^= SISrcMods::NEG;
@@ -7052,8 +7052,8 @@ void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
}
void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
+ const MachineInstr &MI,
+ int OpIdx) const {
unsigned Mods = SISrcMods::OP_SEL_1;
if (MI.getOperand(OpIdx).getImm())
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
@@ -7061,8 +7061,8 @@ void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB,
}
void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx) const {
+ const MachineInstr &MI,
+ int OpIdx) const {
unsigned Val = MI.getOperand(OpIdx).getImm();
unsigned Mods = SISrcMods::OP_SEL_1;
if (Val == 1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index d0b5dc5e11e39..c9da419846ee5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -413,11 +413,11 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
int OpIdx) const;
void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
+ int OpIdx) const;
void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
+ int OpIdx) const;
void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI,
- int OpIdx) const;
+ int OpIdx) const;
void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
More information about the llvm-commits
mailing list