[llvm] 4a305d4 - [AMDGPU] Exclude certain opcodes from being marked as single use (#91802)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 12 02:43:27 PDT 2024
Author: Scott Egerton
Date: 2024-06-12T10:43:23+01:00
New Revision: 4a305d40a392e41521b6a427a6acd8d02d428f6c
URL: https://github.com/llvm/llvm-project/commit/4a305d40a392e41521b6a427a6acd8d02d428f6c
DIFF: https://github.com/llvm/llvm-project/commit/4a305d40a392e41521b6a427a6acd8d02d428f6c.diff
LOG: [AMDGPU] Exclude certain opcodes from being marked as single use (#91802)
The s_singleuse_vdst instruction is used to mark regions of instructions
that produce values that have only one use.
Certain instructions take more than one cycle to execute, resulting in
regions being incorrectly marked.
This patch excludes these multi-cycle instructions from being marked as
either producing single use values or consuming single use values
or both depending on the instruction.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/lib/Target/AMDGPU/VOP1Instructions.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/lib/Target/AMDGPU/VOPCInstructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index b78952ca3a622..43b3bf43fe56d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUGenSearchableTables.inc"
#include "GCNSubtarget.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
@@ -214,12 +215,14 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
RegisterUseCount[Unit]++;
// Do not attempt to optimise across exec mask changes.
- if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+ if (MI.modifiesRegister(AMDGPU::EXEC, TRI) ||
+ AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) {
for (auto &UsedReg : RegisterUseCount)
UsedReg.second = 2;
}
- if (!SIInstrInfo::isVALU(MI))
+ if (!SIInstrInfo::isVALU(MI) ||
+ AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode()))
continue;
if (AllProducerOperandsAreSingleUse) {
SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 40289f2addfdf..d0cd25030de57 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2271,6 +2271,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field bit EnableClamp = _EnableClamp;
field bit IsTrue16 = 0;
field bit IsRealTrue16 = 0;
+ field bit IsInvalidSingleUseConsumer = 0;
+ field bit IsInvalidSingleUseProducer = 0;
field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4b34fb27632a2..ef635fd081e11 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -373,10 +373,18 @@ struct VOPTrue16Info {
bool IsTrue16;
};
+struct SingleUseExceptionInfo {
+ uint16_t Opcode;
+ bool IsInvalidSingleUseConsumer;
+ bool IsInvalidSingleUseProducer;
+};
+
#define GET_MTBUFInfoTable_DECL
#define GET_MTBUFInfoTable_IMPL
#define GET_MUBUFInfoTable_DECL
#define GET_MUBUFInfoTable_IMPL
+#define GET_SingleUseExceptionTable_DECL
+#define GET_SingleUseExceptionTable_IMPL
#define GET_SMInfoTable_DECL
#define GET_SMInfoTable_IMPL
#define GET_VOP1InfoTable_DECL
@@ -608,6 +616,16 @@ bool isTrue16Inst(unsigned Opc) {
return Info ? Info->IsTrue16 : false;
}
+bool isInvalidSingleUseConsumerInst(unsigned Opc) {
+ const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
+ return Info && Info->IsInvalidSingleUseConsumer;
+}
+
+bool isInvalidSingleUseProducerInst(unsigned Opc) {
+ const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc);
+ return Info && Info->IsInvalidSingleUseProducer;
+}
+
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
return Info ? Info->Opcode3Addr : ~0u;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index cf8236b8e23b7..d4729b819c03c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -856,6 +856,12 @@ getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
LLVM_READONLY
bool isTrue16Inst(unsigned Opc);
+LLVM_READONLY
+bool isInvalidSingleUseConsumerInst(unsigned Opc);
+
+LLVM_READONLY
+bool isInvalidSingleUseProducerInst(unsigned Opc);
+
LLVM_READONLY
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index b96c41c1e12af..d39b1a47f0526 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -252,6 +252,7 @@ def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE,
getVOP1Pat<int_amdgcn_readfirstlane,
VOP_READFIRSTLANE>.ret, 1> {
let isConvergent = 1;
+ let IsInvalidSingleUseConsumer = 1;
}
let isReMaterializable = 1 in {
@@ -362,6 +363,7 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> {
let Src0RC32 = VRegSrc_32;
let Src0RC64 = VRegSrc_32;
+ let IsInvalidSingleUseConsumer = 1;
}
// Special case because there are no true output operands. Hack vdst
@@ -405,8 +407,12 @@ class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, un
let EmitDst = 1; // force vdst emission
}
-def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
-def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>;
+let IsInvalidSingleUseProducer = 1 in {
+ def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>;
+ def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32> {
+ let IsInvalidSingleUseConsumer = 1;
+ }
+}
let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in {
// v_movreld_b32 is a special case because the destination output
@@ -535,6 +541,7 @@ let SubtargetPredicate = isGFX9Plus in {
let Constraints = "$vdst = $src1, $vdst1 = $src0";
let DisableEncoding = "$vdst1,$src1";
let SchedRW = [Write64Bit, Write64Bit];
+ let IsInvalidSingleUseConsumer = 1;
}
let isReMaterializable = 1 in
@@ -699,6 +706,8 @@ let SubtargetPredicate = isGFX10Plus in {
let Constraints = "$vdst = $src1, $vdst1 = $src0";
let DisableEncoding = "$vdst1,$src1";
let SchedRW = [Write64Bit, Write64Bit];
+ let IsInvalidSingleUseConsumer = 1;
+ let IsInvalidSingleUseProducer = 1;
}
} // End Uses = [M0]
} // End SubtargetPredicate = isGFX10Plus
@@ -720,7 +729,10 @@ let SubtargetPredicate = isGFX11Plus in {
def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS,
getVOP1Pat<int_amdgcn_permlane64,
VOP_MOVRELS>.ret,
- /*VOP1Only=*/ 1>;
+ /*VOP1Only=*/ 1> {
+ let IsInvalidSingleUseConsumer = 1;
+ let IsInvalidSingleUseProducer = 1;
+ }
defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index ccb5b33dbdc4b..bed8d94a26dcb 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -779,12 +779,14 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag,
} // End isCommutable = 1
// These are special and do not read the exec mask.
-let isConvergent = 1, Uses = []<Register> in {
+let isConvergent = 1, Uses = []<Register>, IsInvalidSingleUseConsumer = 1 in {
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
[(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
- [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
+ [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]> {
+ let IsInvalidSingleUseProducer = 1;
+ }
} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
} // End isConvergent = 1
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 479c0aaf01744..3d93764bd7ca1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -155,12 +155,12 @@ defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_l
} // End SubtargetPredicate = isNotGFX12Plus
} // End SchedRW = [WriteDoubleAdd]
-let SchedRW = [WriteIntMul] in {
+let SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 in {
defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF<VOP_I32_I32_I32>, DivergentBinFrag<mul>>;
defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF<VOP_I32_I32_I32>, mulhu>;
defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
-} // End SchedRW = [WriteIntMul]
+} // End SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
@@ -258,9 +258,9 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d
let isReMaterializable = 1 in
defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
-let Constraints = "@earlyclobber $vdst" in {
+let Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 in {
defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
-} // End Constraints = "@earlyclobber $vdst"
+} // End Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1
let isReMaterializable = 1 in {
@@ -275,14 +275,16 @@ let SchedRW = [Write64Bit] in {
defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, csra_64>;
} // End SubtargetPredicate = isGFX6GFX7
+ let IsInvalidSingleUseConsumer = 1 in {
let SubtargetPredicate = isGFX8Plus in {
defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshr_rev_64>;
defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, cashr_rev_64>;
- } // End SubtargetPredicate = isGFX8Plus
+ } // End SubtargetPredicate = isGFX8Plus, , IsInvalidSingleUseConsumer = 1
let SubtargetPredicate = isGFX8GFX9GFX10GFX11 in {
defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>;
} // End SubtargetPredicate = isGFX8GFX9GFX10GFX11
+ } // End IsInvalidSingleUseConsumer = 1
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
@@ -307,14 +309,14 @@ def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
let HasModifiers = 0;
}
-let SubtargetPredicate = isGFX7Plus in {
+let SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
-} // End SubtargetPredicate = isGFX7Plus
+} // End SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1
-let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
+let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 in {
let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in {
defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
@@ -324,7 +326,7 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
}
-} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU]
+} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1
let FPDPRounding = 1 in {
@@ -859,10 +861,10 @@ let SubtargetPredicate = isGFX10Plus in {
} // End isCommutable = 1, isReMaterializable = 1
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
- let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+ let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in", IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 in {
defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>;
defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
- } // End $vdst = $vdst_in, DisableEncoding $vdst_in
+ } // End $vdst = $vdst_in, DisableEncoding $vdst_in, IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64>;
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64>;
@@ -1275,11 +1277,12 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
-defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
-
-let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
- defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>;
-} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
+let IsInvalidSingleUseConsumer = 1 in {
+ defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>;
+ let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 in {
+ defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>;
+ } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32: $src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1
+} // End IsInvalidSingleUseConsumer = 1
let SubtargetPredicate = isGFX10Before1030 in {
defm V_MUL_LO_I32 : VOP3_Real_gfx10<0x16b>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 4c78bd94458d2..2ede7ec29dddf 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -382,15 +382,19 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
AMDGPUfdot2, 1/*ExplicitClamp*/>;
let OtherPredicates = [HasDot7Insts] in {
-defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
+let IsInvalidSingleUseConsumer = 1 in {
+ defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
+}
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
} // End OtherPredicates = [HasDot7Insts]
let OtherPredicates = [HasDot1Insts] in {
-defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
+let IsInvalidSingleUseConsumer = 1 in {
+ defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
+}
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
} // End OtherPredicates = [HasDot1Insts]
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 372c4f533629c..3bcee28a2cb78 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -435,8 +435,10 @@ multiclass VOPC_I16 <string opName, SDPatternOperator cond = COND_NULL,
multiclass VOPC_I32 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_I32_I32, cond, revOp, 0>;
-multiclass VOPC_I64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
+let IsInvalidSingleUseConsumer = 1 in {
+ multiclass VOPC_I64 <string opName, SDPatternOperator cond = COND_NULL, string revOp = opName> :
+ VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
+}
multiclass VOPCX_F16<string opName, string revOp = opName> {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
@@ -465,8 +467,10 @@ multiclass VOPCX_I16<string opName, string revOp = opName> {
multiclass VOPCX_I32 <string opName, string revOp = opName> :
VOPCX_Pseudos <opName, VOPC_I1_I32_I32, VOPC_I32_I32, COND_NULL, revOp>;
-multiclass VOPCX_I64 <string opName, string revOp = opName> :
- VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
+let IsInvalidSingleUseConsumer = 1 in {
+ multiclass VOPCX_I64 <string opName, string revOp = opName> :
+ VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
+}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 5d1573d8dec19..eb260610e37ff 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -17,6 +17,8 @@ class LetDummies {
bit isReMaterializable;
bit isAsCheapAsAMove;
bit FPDPRounding;
+ bit IsInvalidSingleUseConsumer;
+ bit IsInvalidSingleUseProducer;
Predicate SubtargetPredicate;
string Constraints;
string DisableEncoding;
@@ -81,6 +83,8 @@ class VOP_Pseudo <string opName, string suffix, VOPProfile P, dag outs, dag ins,
string Mnemonic = opName;
Instruction Opcode = !cast<Instruction>(NAME);
bit IsTrue16 = P.IsTrue16;
+ bit IsInvalidSingleUseConsumer = P.IsInvalidSingleUseConsumer;
+ bit IsInvalidSingleUseProducer = P.IsInvalidSingleUseProducer;
VOPProfile Pfl = P;
string AsmOperands;
@@ -175,6 +179,8 @@ class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
class VOP_Real<VOP_Pseudo ps> {
Instruction Opcode = !cast<Instruction>(NAME);
bit IsSingle = ps.Pfl.IsSingle;
+ bit IsInvalidSingleUseConsumer = ps.Pfl.IsInvalidSingleUseConsumer;
+ bit IsInvalidSingleUseProducer = ps.Pfl.IsInvalidSingleUseProducer;
}
class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemonic> :
@@ -823,9 +829,7 @@ class VOP3P_DPPe_Common<bits<7> op, VOPProfile P> : VOP3P_DPPe_Common_Base<op, P
class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
dag Ins = P.InsDPP, string asmOps = P.AsmDPP> :
- InstSI <P.OutsDPP, Ins, OpName#asmOps, pattern>,
- VOP <OpName>,
- SIMCInstr <OpName#"_dpp", SIEncodingFamily.NONE> {
+ VOP_Pseudo<OpName, "_dpp", P, P.OutsDPP, Ins, asmOps, pattern> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -857,6 +861,9 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[],
let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
let DecoderNamespace = "GFX8";
+ let IsInvalidSingleUseConsumer = !not(VINTERP);
+ let IsInvalidSingleUseProducer = !not(VINTERP);
+
VOPProfile Pfl = P;
}
@@ -1725,3 +1732,12 @@ def VOPTrue16Table : GenericTable {
let PrimaryKey = ["Opcode"];
let PrimaryKeyName = "getTrue16OpcodeHelper";
}
+
+def SingleUseExceptionTable : GenericTable {
+ let FilterClass = "VOP_Pseudo";
+ let CppTypeName = "SingleUseExceptionInfo";
+ let Fields = ["Opcode", "IsInvalidSingleUseConsumer", "IsInvalidSingleUseProducer"];
+
+ let PrimaryKey = ["Opcode"];
+ let PrimaryKeyName = "getSingleUseExceptionHelper";
+}
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
index f2a5139b73b10..9e65ce329df43 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
# One single-use producer.
---
@@ -38,14 +37,14 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
- ; CHECK-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+ ; CHECK-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr4_vgpr5
bb.0:
liveins: $vgpr0_vgpr1
$vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
- $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+ $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec
bb.1:
liveins: $vgpr4_vgpr5
...
@@ -1238,3 +1237,184 @@ body: |
liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36
...
+
+# Tests for multi-cycle instructions that are explicitly excluded.
+
+# Valid producers but invalid consumer opcodes.
+---
+name: v_mul_hi_u32_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: v_mul_hi_u32_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr3
+ bb.0:
+ liveins: $vgpr0
+ $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec
+ $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr3
+...
+
+---
+name: v_cmpx_t_u64_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: v_cmpx_t_u64_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec
+ S_BRANCH %bb.1
+ bb.1:
+ liveins: $vgpr0
+...
+
+---
+name: v_lshlrev_b64_e64
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: v_lshlrev_b64_e64
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec
+ ; CHECK-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr4_vgpr5
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec
+ $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+ $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec
+ bb.1:
+ liveins: $vgpr4_vgpr5
+...
+
+# Invalid producers but valid consumer opcodes.
+---
+name: v_movereld_b32_e32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: v_movereld_b32_e32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $m0 = S_MOV_B32 0
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ ; CHECK-NEXT: V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4)
+ ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr3
+ bb.0:
+ liveins: $vgpr0, $vgpr2
+ $m0 = S_MOV_B32 0
+ $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4)
+ $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec
+ bb.1:
+ liveins: $vgpr3
+...
+
+# Invalid producers and invalid consumer opcodes.
+---
+name: v_writelane_b32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: v_writelane_b32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0
+ bb.0:
+ liveins: $vgpr0, $sgpr0
+ $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1
+ $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
+ bb.1:
+ liveins: $vgpr0
+...
+
+# DPP instructions cannot be single use producers or consumers
+---
+name: V_ADD_NC_U32_dpp
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: V_ADD_NC_U32_dpp
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0
+ bb.0:
+ liveins: $vgpr0, $vcc
+ $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+ $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+ bb.1:
+ liveins: $vgpr0
+...
+
+# Exception to the rule that dpp instructions
+# cannot be single use producers or consumers
+---
+name: V_INTERP_MOV_F32
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: V_INTERP_MOV_F32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
+ ; CHECK-NEXT: $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr1
+ bb.0:
+ $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+ bb.1:
+ liveins: $vgpr1
+...
+
More information about the llvm-commits
mailing list