[llvm] 289ae65 - [AMDGPU][MC] Fix handling of A16 operands in intersect_ray instructions.
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 13 11:46:10 PDT 2023
Author: Ivan Kosarev
Date: 2023-07-13T19:46:03+01:00
New Revision: 289ae6525dffd9eb9a23753acd407235e125c690
URL: https://github.com/llvm/llvm-project/commit/289ae6525dffd9eb9a23753acd407235e125c690
DIFF: https://github.com/llvm/llvm-project/commit/289ae6525dffd9eb9a23753acd407235e125c690.diff
LOG: [AMDGPU][MC] Fix handling of A16 operands in intersect_ray instructions.
The patch adds the support for 'noa16' operands in non-A16 variants of
the instructions, fixes validation of A16 operands and eliminates the
custom conversion to MCInst.
Part of <https://github.com/llvm/llvm-project/issues/62629>.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D155057
Added:
Modified:
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/MIMGInstructions.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
llvm/test/MC/AMDGPU/gfx1030_err.s
llvm/test/MC/AMDGPU/gfx1030_new.s
llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s
llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 9035f6f0c6a305..0569e5834435d6 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1632,7 +1632,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
- bool validateMIMGAddrSize(const MCInst &Inst);
+ bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGMSAA(const MCInst &Inst);
bool validateOpSel(const MCInst &Inst);
@@ -1742,8 +1742,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic = false);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
- void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
-
void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
bool parseDimId(unsigned &Encoding);
@@ -3580,7 +3578,8 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
return false;
}
-bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
+bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
+ const SMLoc &IDLoc) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
@@ -3600,8 +3599,13 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
assert(SrsrcIdx != -1);
assert(SrsrcIdx > VAddr0Idx);
- if (DimIdx == -1)
- return true; // intersect_ray
+ bool IsA16 = Inst.getOperand(A16Idx).getImm();
+ if (BaseOpcode->BVH) {
+ if (IsA16 == BaseOpcode->A16)
+ return true;
+ Error(IDLoc, "image address size does not match a16");
+ return false;
+ }
unsigned Dim = Inst.getOperand(DimIdx).getImm();
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
@@ -3609,7 +3613,6 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
unsigned ActualAddrSize =
IsNSA ? SrsrcIdx - VAddr0Idx
: AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
- bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
unsigned ExpectedAddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
@@ -3620,7 +3623,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
unsigned VAddrLastSize =
AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
- return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize;
+ ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
}
} else {
if (ExpectedAddrSize > 12)
@@ -3633,7 +3636,11 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
return true;
}
- return ActualAddrSize == ExpectedAddrSize;
+ if (ActualAddrSize == ExpectedAddrSize)
+ return true;
+
+ Error(IDLoc, "image address size does not match dim and a16");
+ return false;
}
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
@@ -4569,11 +4576,8 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMIMGDataSize(Inst, IDLoc)) {
return false;
}
- if (!validateMIMGAddrSize(Inst)) {
- Error(IDLoc,
- "image address size does not match dim and a16");
+ if (!validateMIMGAddrSize(Inst, IDLoc))
return false;
- }
if (!validateMIMGAtomicDMask(Inst)) {
Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
"invalid atomic image dmask");
@@ -7760,17 +7764,6 @@ void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
}
-void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
- const OperandVector &Operands) {
- for (unsigned I = 1; I < Operands.size(); ++I) {
- auto &Operand = (AMDGPUOperand &)*Operands[I];
- if (Operand.isReg())
- Operand.addRegOperands(Inst, 1);
- }
-
- Inst.addOperand(MCOperand::createImm(1)); // a16
-}
-
//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 54b52eb4f324bf..1b05acd5c90a73 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -865,8 +865,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
assert(VDataIdx != -1);
if (BaseOpcode->BVH) {
// Add A16 operand for intersect_ray instructions
- if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16))
- addOperand(MI, MCOperand::createImm(1));
+ addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
return MCDisassembler::Success;
}
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 0420cf04748324..7847b97579efce 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -48,6 +48,7 @@ class MIMGBaseOpcode : PredicateControl {
bit IsAtomicRet = 0;
bit MSAA = 0;
bit BVH = 0;
+ bit A16 = 0;
}
def MIMGBaseOpcode : GenericEnum {
@@ -59,7 +60,7 @@ def MIMGBaseOpcodesTable : GenericTable {
let CppTypeName = "MIMGBaseOpcodeInfo";
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
- "LodOrClampOrMip", "HasD16", "MSAA", "BVH"];
+ "LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
let PrimaryKey = ["BaseOpcode"];
@@ -1191,50 +1192,43 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
}
-class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, bit IsA16>
+class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx10<op.GFX10M, (outs VReg_128:$vdata), "AMDGPU"> {
-
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
}
-class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit IsA16>
+class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs>
: MIMG_nsa_gfx10<op.GFX10M, (outs VReg_128:$vdata), num_addrs, "AMDGPU"> {
- let InOperandList = !con(nsah.AddrIns,
- (ins SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
-class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC, bit IsA16>
+class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx11<op.GFX11, (outs VReg_128:$vdata), "AMDGPU"> {
-
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
}
class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
- bit IsA16, list<RegisterClass> addr_types>
+ list<RegisterClass> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "AMDGPU",
addr_types> {
- let InOperandList = !con(nsah.AddrIns,
- (ins SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16>;
def "" : MIMGBaseOpcode {
let BVH = 1;
+ let A16 = IsA16;
}
- let AsmMatchConverter = !if(IsA16, "cvtIntersectRay", ""),
+ let AsmMatchConverter = "",
dmask = 0xf,
unorm = 1,
d16 = 0,
@@ -1248,17 +1242,17 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
d16 = 0,
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
VDataDwords = 4 in {
- def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass, IsA16> {
+ def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass> {
let VAddrDwords = info.VAddrDwords;
}
- def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass, IsA16> {
+ def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass> {
let VAddrDwords = info.VAddrDwords;
}
- def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs, IsA16> {
+ def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs> {
let VAddrDwords = info.num_addrs;
}
def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11<op, opcode,
- info.gfx11_nsa_addrs, IsA16,
+ info.gfx11_nsa_addrs,
info.gfx11_addr_types> {
let VAddrDwords = info.num_addrs;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 65ee673aae91ae..29ec22f55f8036 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8044,8 +8044,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
Ops.push_back(TDescr);
- if (IsA16)
- Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
+ Ops.push_back(DAG.getTargetConstant(IsA16, DL, MVT::i1));
Ops.push_back(M->getChain());
auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index dd8c3f73ac25be..f1aa8a72c9e44d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -355,6 +355,7 @@ struct MIMGBaseOpcodeInfo {
bool HasD16;
bool MSAA;
bool BVH;
+ bool A16;
};
LLVM_READONLY
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
index e441e0e0aecce2..1e4e0ea017fc5a 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
@@ -7,11 +7,11 @@ body: |
bb.0:
; GCN-LABEL: name: waitcnt-check-inorder
; GCN: S_WAITCNT 0
- ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
- ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
S_ENDPGM 0
...
---
@@ -20,11 +20,11 @@ body: |
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-vmem
; GCN: S_WAITCNT 0
- ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -34,11 +34,11 @@ body: |
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-mimg-samp
; GCN: S_WAITCNT 0
- ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
; GCN-NEXT: S_ENDPGM 0
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
S_ENDPGM 0
...
@@ -50,10 +50,10 @@ body: |
; GCN: S_WAITCNT 0
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; GCN-NEXT: S_WAITCNT 16240
- ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
S_ENDPGM 0
...
---
@@ -64,9 +64,9 @@ body: |
; GCN: S_WAITCNT 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
; GCN-NEXT: S_WAITCNT 16240
- ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
- $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
S_ENDPGM 0
...
diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s
index 0b005bc8eed5e1..1bab041909ab4a 100644
--- a/llvm/test/MC/AMDGPU/gfx1030_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1030_err.s
@@ -207,3 +207,9 @@ s_waitcnt_depctr depctr_hold_cnt(0) depctr_hold_cnt(0)
s_waitcnt_depctr depctr_sa_sdst(0) depctr_sa_sdst(0)
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: duplicate counter name depctr_sa_sdst
+
+image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16
+
+image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] noa16
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16
diff --git a/llvm/test/MC/AMDGPU/gfx1030_new.s b/llvm/test/MC/AMDGPU/gfx1030_new.s
index 32426d82411b7c..19907d690b5567 100644
--- a/llvm/test/MC/AMDGPU/gfx1030_new.s
+++ b/llvm/test/MC/AMDGPU/gfx1030_new.s
@@ -87,10 +87,10 @@ v_fmac_legacy_f32 v0, s1, 2.0
image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]
-image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
-// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40]
+image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] noa16
+// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x00]
-image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16
+image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
// GFX10: encoding: [0x01,0x9f,0x98,0xf1,0x09,0x04,0x01,0x40]
image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
@@ -102,10 +102,10 @@ image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15]
// GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]
-image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16
-// GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00]
+image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40], s[12:15] noa16
+// GFX10: encoding: [0x07,0x9f,0x98,0xf1,0x32,0x27,0x03,0x00,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x13,0x25,0x28,0x00,0x00]
-image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] noa16
+image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] a16
// GFX10: encoding: [0x05,0x9f,0x98,0xf1,0x32,0x27,0x03,0x40,0x2e,0x17,0x11,0x10,0x0f,0x15,0x14,0x00]
image_bvh64_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20, v19, v37, v40, v42], s[12:15]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s
index bf0aa8f49d4c6f..cc24d39bb45dbd 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_err.s
@@ -146,3 +146,9 @@ image_bvh64_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[
image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22]], s[12:15] a16
// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16
+// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16
+
+image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] noa16
+// NOGFX11: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s
index d32f369e00c1d7..6736009685cb38 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_mimg_features.s
@@ -325,10 +325,10 @@ image_msaa_load v[10:13], [v204, v11, v14, v19], s[40:47] dmask:0x1 dim:SQ_RSRC_
image_bvh_intersect_ray v[4:7], v[9:19], s[4:7]
// GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]
-image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
-// GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]
+image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] noa16
+// GFX11: image_bvh_intersect_ray v[4:7], v[9:19], s[4:7] ; encoding: [0x80,0x8f,0x64,0xf0,0x09,0x04,0x01,0x00]
-image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16
+image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16
// GFX11: image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] a16 ; encoding: [0x80,0x8f,0x65,0xf0,0x09,0x04,0x01,0x00]
image_bvh64_intersect_ray v[4:7], v[9:20], s[4:7]
@@ -340,10 +340,10 @@ image_bvh64_intersect_ray v[4:7], v[9:17], s[4:7] a16
image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15]
// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]
-image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16
-// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]
+image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] noa16
+// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42], v[47:49]], s[12:15] ; encoding: [0x81,0x8f,0x64,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x2f]
-image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] noa16
+image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16
// GFX11: image_bvh_intersect_ray v[39:42], [v50, v46, v[20:22], v[40:42]], s[12:15] a16 ; encoding: [0x81,0x8f,0x65,0xf0,0x32,0x27,0x03,0x00,0x2e,0x14,0x28,0x00]
image_bvh64_intersect_ray v[39:42], [v[50:51], v46, v[20:22], v[40:42], v[47:49]], s[12:15]
More information about the llvm-commits
mailing list