[llvm] [AMDGPU] CodeGen for GFX12 VIMAGE and VSAMPLE instructions (PR #75488)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 08:00:15 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
---
Patch is 1.20 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75488.diff
57 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+3-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+13-5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+14-8)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+33-21)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+22-15)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+5-1)
- (modified) llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (+13-10)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll (+577)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll (+1646)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll (+1755)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll (+147)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll (+127)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll (+760)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll (+82)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll (+621)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll (+615)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir (+37)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll (+311)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll (+374)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll (+117)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll (+159)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll (+213)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll (+232)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll (+61)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll (+77)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll (+63)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll (+63)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll (+61)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll (+95)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll (+2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll (+167)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll (+300)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll (+300)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll (+24)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll (+13)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll (+565)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll (+129)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll (+2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll (+28-9)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll (+3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll (+13)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll (+13)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll (+94-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll (+342)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll (+81)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll (+545)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll (+81)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll (+81)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll (+85)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll (+85)
- (added) llvm/test/CodeGen/AMDGPU/merge-image-load-gfx12.mir (+505)
- (added) llvm/test/CodeGen/AMDGPU/merge-image-sample-gfx12.mir (+995)
- (added) llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll (+385)
- (added) llvm/test/CodeGen/AMDGPU/verify-vimage-vsample.mir (+58)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll (+139)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 09e88152e65d2a..dd8ba29d40e312 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -845,7 +845,9 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
!if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR)
llvm_i1_ty], []), // unorm(imm)
[llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
- llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
+ llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc;
+ // gfx12+ imm: bits [0-2] = th, bits [3-4] = scope)
+ // TODO-GFX12: Update all other cachepolicy descriptions.
!listconcat(props,
!if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 75fac09d0b99fa..569b0d332c6213 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1832,6 +1832,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned IntrOpcode = Intr->BaseOpcode;
const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI);
+ const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI);
const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
@@ -1916,7 +1917,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
if (BaseOpcode->Atomic)
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
- if (CPol & ~AMDGPU::CPol::ALL)
+ if (CPol & ~(IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12))
return false;
int NumVAddrRegs = 0;
@@ -1951,7 +1952,10 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
++NumVDataDwords;
int Opcode = -1;
- if (IsGFX11Plus) {
+ if (IsGFX12Plus) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
+ NumVDataDwords, NumVAddrDwords);
+ } else if (IsGFX11Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx11Default,
@@ -2024,7 +2028,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (IsGFX10Plus)
MIB.addImm(DimInfo->Encoding);
- MIB.addImm(Unorm);
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::unorm))
+ MIB.addImm(Unorm);
MIB.addImm(CPol);
MIB.addImm(IsA16 && // a16 or r128
@@ -2039,7 +2044,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
return false;
}
- MIB.addImm(LWE); // lwe
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::lwe))
+ MIB.addImm(LWE); // lwe
if (!IsGFX10Plus)
MIB.addImm(DimInfo->DA ? -1 : 0);
if (BaseOpcode->HasD16)
@@ -5448,7 +5454,9 @@ void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
- MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
+ MIB.addImm(MI.getOperand(OpIdx).getImm() &
+ (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL
+ : AMDGPU::CPol::ALL_pregfx12));
}
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 489b4f5a8d86a5..5ff34d0da3561e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6151,7 +6151,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
return false;
}
- const unsigned NSAMaxSize = ST.getNSAMaxSize();
+ const unsigned NSAMaxSize = ST.getNSAMaxSize(BaseOpcode->Sampler);
const unsigned HasPartialNSA = ST.hasPartialNSAEncoding();
if (IsA16 || IsG16) {
@@ -6211,7 +6211,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
//
- // Partial NSA is allowed on GFX11 where the final register is a contiguous
+ // Partial NSA is allowed on GFX11+ where the final register is a contiguous
// set of the remaining addresses.
const bool UseNSA = ST.hasNSAEncoding() &&
CorrectedNumVAddrs >= ST.getNSAThreshold(MF) &&
@@ -6635,13 +6635,17 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return false;
}
+ const bool IsGFX11 = AMDGPU::isGFX11(ST);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(ST);
+ const bool IsGFX12Plus = AMDGPU::isGFX12Plus(ST);
const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
const unsigned NumVDataDwords = 4;
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
- const bool UseNSA = ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize();
+ const bool UseNSA =
+ IsGFX12Plus || (ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize());
+
const unsigned BaseOpcodes[2][2] = {
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
@@ -6649,14 +6653,16 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
int Opcode;
if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
+ IsGFX12Plus ? AMDGPU::MIMGEncGfx12
+ : IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx10NSA,
NumVDataDwords, NumVAddrDwords);
} else {
- Opcode = AMDGPU::getMIMGOpcode(
- BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default : AMDGPU::MIMGEncGfx10Default,
- NumVDataDwords, NumVAddrDwords);
+ assert(!IsGFX12Plus);
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ IsGFX11 ? AMDGPU::MIMGEncGfx11Default
+ : AMDGPU::MIMGEncGfx10Default,
+ NumVDataDwords, NumVAddrDwords);
}
assert(Opcode != -1);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 708f212e204acf..653dd4a9e8ccdd 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7125,6 +7125,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
+ bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());
@@ -7144,7 +7145,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (BaseOpcode->Atomic) {
VData = Op.getOperand(2);
- bool Is64Bit = VData.getValueType() == MVT::i64;
+ bool Is64Bit = VData.getValueSizeInBits() == 64;
if (BaseOpcode->AtomicX2) {
SDValue VData2 = Op.getOperand(3);
VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, DL,
@@ -7304,9 +7305,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
//
- // Partial NSA is allowed on GFX11 where the final register is a contiguous
+ // Partial NSA is allowed on GFX11+ where the final register is a contiguous
// set of the remaining addresses.
- const unsigned NSAMaxSize = ST->getNSAMaxSize();
+ const unsigned NSAMaxSize = ST->getNSAMaxSize(BaseOpcode->Sampler);
const bool HasPartialNSAEncoding = ST->hasPartialNSAEncoding();
const bool UseNSA = ST->hasNSAEncoding() &&
VAddrs.size() >= ST->getNSAThreshold(MF) &&
@@ -7383,7 +7384,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
if (BaseOpcode->Atomic)
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
- if (CPol & ~AMDGPU::CPol::ALL)
+ if (CPol & ~(IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12))
return Op;
SmallVector<SDValue, 26> Ops;
@@ -7403,7 +7404,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
if (IsGFX10Plus)
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
- Ops.push_back(Unorm);
+ if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
+ Ops.push_back(Unorm);
Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
@@ -7414,7 +7416,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
} else if (cast<ConstantSDNode>(TFE)->getZExtValue()) {
report_fatal_error("TFE is not supported on this GPU");
}
- Ops.push_back(LWE); // lwe
+ if (!IsGFX12Plus || BaseOpcode->Sampler || BaseOpcode->MSAA)
+ Ops.push_back(LWE); // lwe
if (!IsGFX10Plus)
Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
@@ -7426,7 +7429,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
- if (IsGFX11Plus) {
+ if (IsGFX12Plus) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
+ NumVDataDwords, NumVAddrDwords);
+ } else if (IsGFX11Plus) {
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
UseNSA ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx11Default,
@@ -7743,7 +7749,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- if (CPol & ~AMDGPU::CPol::ALL)
+ if (CPol & ~((Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12)
+ ? AMDGPU::CPol::ALL
+ : AMDGPU::CPol::ALL_pregfx12))
return Op;
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
DAG);
@@ -8491,14 +8499,17 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
+ const bool IsGFX11 = AMDGPU::isGFX11(*Subtarget);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
+ const bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
const bool Is64 = NodePtr.getValueType() == MVT::i64;
const unsigned NumVDataDwords = 4;
const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
- const bool UseNSA =
- Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize();
+ const bool UseNSA = (Subtarget->hasNSAEncoding() &&
+ NumVAddrs <= Subtarget->getNSAMaxSize()) ||
+ IsGFX12Plus;
const unsigned BaseOpcodes[2][2] = {
{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
@@ -8506,15 +8517,16 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
int Opcode;
if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
+ IsGFX12Plus ? AMDGPU::MIMGEncGfx12
+ : IsGFX11 ? AMDGPU::MIMGEncGfx11NSA
: AMDGPU::MIMGEncGfx10NSA,
NumVDataDwords, NumVAddrDwords);
} else {
- Opcode =
- AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
- IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
- : AMDGPU::MIMGEncGfx10Default,
- NumVDataDwords, NumVAddrDwords);
+ assert(!IsGFX12Plus);
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ IsGFX11 ? AMDGPU::MIMGEncGfx11Default
+ : AMDGPU::MIMGEncGfx10Default,
+ NumVDataDwords, NumVAddrDwords);
}
assert(Opcode != -1);
@@ -14004,7 +14016,7 @@ static unsigned SubIdx2Lane(unsigned Idx) {
}
}
-/// Adjust the writemask of MIMG instructions
+/// Adjust the writemask of MIMG, VIMAGE or VSAMPLE instructions
SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
SelectionDAG &DAG) const {
unsigned Opcode = Node->getMachineOpcode();
@@ -14022,7 +14034,7 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
bool UsesTFC = ((int(TFEIdx) >= 0 && Node->getConstantOperandVal(TFEIdx)) ||
- Node->getConstantOperandVal(LWEIdx))
+ (int(LWEIdx) >= 0 && Node->getConstantOperandVal(LWEIdx)))
? true
: false;
unsigned TFCLane = 0;
@@ -14234,7 +14246,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();
- if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
+ if (TII->isImage(Opcode) && !TII->get(Opcode).mayStore() &&
!TII->isGather4(Opcode) &&
AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::dmask)) {
return adjustWritemask(Node, DAG);
@@ -14321,7 +14333,7 @@ void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
return;
unsigned TFEVal = TFE ? TFE->getImm() : 0;
- unsigned LWEVal = LWE->getImm();
+ unsigned LWEVal = LWE ? LWE->getImm() : 0;
unsigned D16Val = D16 ? D16->getImm() : 0;
if (!TFEVal && !LWEVal)
@@ -14458,7 +14470,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
return;
}
- if (TII->isMIMG(MI)) {
+ if (TII->isImage(MI)) {
if (!MI.mayStore())
AddIMGInit(MI);
TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d4746b559d9256..36e49a5d15e5cd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -17,6 +17,7 @@
#include "GCNHazardRecognizer.h"
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -4515,8 +4516,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
- if (isMIMG(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) {
- ErrInfo = "missing memory operand from MIMG instruction.";
+ if (isImage(MI) && MI.memoperands_empty() && MI.mayLoadOrStore()) {
+ ErrInfo = "missing memory operand from image instruction.";
return false;
}
@@ -4708,8 +4709,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- // Verify MIMG
- if (isMIMG(MI.getOpcode()) && !MI.mayStore()) {
+ // Verify MIMG / VIMAGE / VSAMPLE
+ if (isImage(MI.getOpcode()) && !MI.mayStore()) {
// Ensure that the return type used is large enough for all the options
// being used TFE/LWE require an extra result register.
const MachineOperand *DMask = getNamedOperand(MI, AMDGPU::OpName::dmask);
@@ -4973,12 +4974,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- if (isMIMG(MI)) {
+ if (isImage(MI)) {
const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
if (DimOp) {
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
AMDGPU::OpName::vaddr0);
- int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
+ int RSrcOpName =
+ isMIMG(MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
+ int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode);
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
@@ -4999,16 +5002,17 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
IsA16 = A16->getImm() != 0;
}
- bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
+ bool IsNSA = RsrcIdx - VAddr0Idx > 1;
unsigned AddrWords =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, ST.hasG16());
unsigned VAddrWords;
if (IsNSA) {
- VAddrWords = SRsrcIdx - VAddr0Idx;
- if (ST.hasPartialNSAEncoding() && AddrWords > ST.getNSAMaxSize()) {
- unsigned LastVAddrIdx = SRsrcIdx - 1;
+ VAddrWords = RsrcIdx - VAddr0Idx;
+ if (ST.hasPartialNSAEncoding() &&
+ AddrWords > ST.getNSAMaxSize(isVSAMPLE(MI))) {
+ unsigned LastVAddrIdx = RsrcIdx - 1;
VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1;
}
} else {
@@ -6518,18 +6522,21 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
return CreatedBB;
}
- // Legalize MIMG and MUBUF/MTBUF for shaders.
+ // Legalize MIMG/VIMAGE/VSAMPLE and MUBUF/MTBUF for shaders.
//
// Shaders only generate MUBUF/MTBUF instructions via intrinsics or via
// scratch memory access. In both cases, the legalization never involves
// conversion to the addr64 form.
- if (isMIMG(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) &&
- (isMUBUF(MI) || isMTBUF(MI)))) {
- MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
+ if (isImage(MI) || (AMDGPU::isGraphics(MF.getFunction().getCallingConv()) &&
+ (isMUBUF(MI) || isMTBUF(MI)))) {
+ int RSrcOpName = (isVIMAGE(MI) || isVSAMPLE(MI)) ? AMDGPU::OpName::rsrc
+ : AMDGPU::OpName::srsrc;
+ MachineOperand *SRsrc = getNamedOperand(MI, RSrcOpName);
if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg())))
CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SRsrc}, MDT);
- MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
+ int SampOpName = isMIMG(MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
+ MachineOperand *SSamp = getNamedOperand(MI, SampOpName);
if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg())))
CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SSamp}, MDT);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 6c106b8b68b5c4..36734c7a75a822 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -887,7 +887,11 @@ def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
//===----------------------------------------------------------------------===//
def extract_cpol : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant(N->getZExtValue()...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/75488
More information about the llvm-commits
mailing list