[llvm] 99c790d - [AMDGPU] Make BVH isel consistent with other MIMG opcodes
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 16 18:42:48 PDT 2021
Author: Carl Ritson
Date: 2021-08-17T10:42:22+09:00
New Revision: 99c790dc21b80cceae6084f6cec8c66e75c8d390
URL: https://github.com/llvm/llvm-project/commit/99c790dc21b80cceae6084f6cec8c66e75c8d390
DIFF: https://github.com/llvm/llvm-project/commit/99c790dc21b80cceae6084f6cec8c66e75c8d390.diff
LOG: [AMDGPU] Make BVH isel consistent with other MIMG opcodes
Suffix opcodes with _gfx10.
Remove direct references to architecture specific opcodes.
Add a BVH flag and apply this to diassembly.
Fix a number of disassembly errors on gfx90a target caused by
previous incorrect BVH detection code.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D108117
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/lib/Target/AMDGPU/MIMGInstructions.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt
llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5a8c14234bb77..71e120974c2e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4825,18 +4825,25 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
- const unsigned NumVAddrs = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
- const bool UseNSA = ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize();
- const unsigned Opcodes[2][2][2] = {
- {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa},
- {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa}},
- {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa},
- {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa}}};
- const unsigned Opcode = Opcodes[UseNSA][IsA16][Is64];
+ const unsigned NumVDataDwords = 4;
+ const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
+ const bool UseNSA =
+ ST.hasNSAEncoding() && NumVAddrDwords <= ST.getNSAMaxSize();
+ const unsigned BaseOpcodes[2][2] = {
+ {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
+ {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
+ AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+ int Opcode;
+ if (UseNSA) {
+ Opcode =
+ AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10NSA,
+ NumVDataDwords, NumVAddrDwords);
+ } else {
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
+ PowerOf2Ceil(NumVAddrDwords));
+ }
+ assert(Opcode != -1);
SmallVector<Register, 12> Ops;
if (Is64) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fe62b8590fa0e..555f6bc5cd960 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -693,22 +693,21 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::d16);
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+
assert(VDataIdx != -1);
- if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray
+ if (BaseOpcode->BVH) {
+ // Add A16 operand for intersect_ray instructions
if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
- assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa);
addOperand(MI, MCOperand::createImm(1));
}
return MCDisassembler::Success;
}
- const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
bool IsAtomic = (VDstIdx != -1);
bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
-
bool IsNSA = false;
unsigned AddrSize = Info->VAddrDwords;
@@ -717,8 +716,6 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
int A16Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
- const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
const AMDGPU::MIMGDimInfo *Dim =
AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index bacb790aac62f..288cf6b02f9ff 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -43,6 +43,7 @@ class MIMGBaseOpcode : PredicateControl {
bit HasD16 = 0;
bit IsAtomicRet = 0;
bit MSAA = 0;
+ bit BVH = 0;
}
def MIMGBaseOpcode : GenericEnum {
@@ -54,7 +55,7 @@ def MIMGBaseOpcodesTable : GenericTable {
let CppTypeName = "MIMGBaseOpcodeInfo";
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
- "LodOrClampOrMip", "HasD16", "MSAA"];
+ "LodOrClampOrMip", "HasD16", "MSAA", "BVH"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
let PrimaryKey = ["BaseOpcode"];
@@ -872,6 +873,14 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
+class MIMG_IntersectRay_Helper<bit Is64, bit A16> {
+ int num_addrs = !if(Is64, !if(A16, 9, 12), !if(A16, 8, 11));
+ // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
+ // when we only need 9, 11 or 12 depending on A16 field and ptr size.
+ RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
+ int VAddrDwords = !srl(RegClass.Size, 5);
+}
+
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, bit A16>
: MIMG_gfx10<op.BASE, (outs VReg_128:$vdata), "AMDGPU"> {
@@ -890,8 +899,11 @@ class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", "");
}
-multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16> {
- def "" : MIMGBaseOpcode;
+multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit A16> {
+ defvar info = MIMG_IntersectRay_Helper<Is64, A16>;
+ def "" : MIMGBaseOpcode {
+ let BVH = 1;
+ }
let SubtargetPredicate = HasGFX10_AEncoding,
AssemblerPredicate = HasGFX10_AEncoding,
AsmMatchConverter = !if(A16, "cvtIntersectRay", ""),
@@ -908,13 +920,11 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16>
d16 = 0,
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
VDataDwords = 4 in {
- // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
- // when we only need 9, 11 or 12 depending on A16 field and ptr size.
- def "_sa" : MIMG_IntersectRay_gfx10<op, opcode, MIMGAddrSize<num_addrs, 0>.RegClass, A16> {
- let VAddrDwords = !srl(MIMGAddrSize<num_addrs, 0>.RegClass.Size, 5);
+ def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass, A16> {
+ let VAddrDwords = info.VAddrDwords;
}
- def _nsa : MIMG_IntersectRay_nsa_gfx10<op, opcode, num_addrs, A16> {
- let VAddrDwords = num_addrs;
+ def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs, A16> {
+ let VAddrDwords = info.num_addrs;
}
}
}
@@ -1045,10 +1055,10 @@ defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<0xef>, AMDGPUSample_c_cd
let SubtargetPredicate = HasGFX10_AEncoding in
defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<0x80>, "image_msaa_load", 1, 0, 0, 1>;
-defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 11, 0>;
-defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 8, 1>;
-defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 12, 0>;
-defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 9, 1>;
+defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 0>;
+defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 1>;
+defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 1>;
/********** ========================================= **********/
/********** Table of dimension-aware image intrinsics **********/
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0d21d77d60470..b16cf1f3bed3e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7373,19 +7373,25 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
const bool Is64 = NodePtr.getValueType() == MVT::i64;
- const unsigned NumVAddrs = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
- const bool UseNSA =
- Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize();
- const unsigned Opcodes[2][2][2] = {
- {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa},
- {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa}},
- {{AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa},
- {AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa}}};
- const unsigned Opcode = Opcodes[UseNSA][IsA16][Is64];
+ const unsigned NumVDataDwords = 4;
+ const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
+ const bool UseNSA = Subtarget->hasNSAEncoding() &&
+ NumVAddrDwords <= Subtarget->getNSAMaxSize();
+ const unsigned BaseOpcodes[2][2] = {
+ {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
+ {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
+ AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+ int Opcode;
+ if (UseNSA) {
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ AMDGPU::MIMGEncGfx10NSA, NumVDataDwords,
+ NumVAddrDwords);
+ } else {
+ Opcode = AMDGPU::getMIMGOpcode(
+ BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
+ PowerOf2Ceil(NumVAddrDwords));
+ }
+ assert(Opcode != -1);
SmallVector<SDValue, 16> Ops;
@@ -7428,7 +7434,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (!UseNSA) {
// Build a single vector containing all the operands so far prepared.
- if (NumVAddrs > 8) {
+ if (NumVAddrDwords > 8) {
SDValue Undef = DAG.getUNDEF(MVT::i32);
Ops.append(16 - Ops.size(), Undef);
}
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 493c1ad87f93d..5dd621856a721 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -343,6 +343,9 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1 &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) == -1)
return UNKNOWN;
+ // Ignore BVH instructions
+ if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
+ return UNKNOWN;
// TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() ||
TII.isGather4(Opc))
@@ -380,15 +383,6 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64:
case AMDGPU::DS_WRITE_B64_gfx9:
return DS_WRITE;
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa:
- return UNKNOWN;
}
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 72c872dec5ba7..5bd9f85fab993 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -292,8 +292,12 @@ struct MIMGBaseOpcodeInfo {
bool LodOrClampOrMip;
bool HasD16;
bool MSAA;
+ bool BVH;
};
+LLVM_READONLY
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
+
LLVM_READONLY
const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt
index 0b65c52d66d8d..0688cd71537de 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_ldst_acc.txt
@@ -7869,40 +7869,40 @@
# GFX90A: image_load a5, v2, s[8:15] dmask:0x2 ; encoding: [0x00,0x02,0x01,0xf0,0x02,0x05,0x02,0x00]
0x00,0x02,0x01,0xf0,0x02,0x05,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_load a5, v2, s[8:15] dmask:0x4 ; encoding: [0x00,0x04,0x01,0xf0,0x02,0x05,0x02,0x00]
0x00,0x04,0x01,0xf0,0x02,0x05,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0x6 ; encoding: [0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x6 ; encoding: [0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0x7 ; encoding: [0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0x7 ; encoding: [0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_load a5, v2, s[8:15] dmask:0x8 ; encoding: [0x00,0x08,0x01,0xf0,0x02,0x05,0x02,0x00]
0x00,0x08,0x01,0xf0,0x02,0x05,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0x9 ; encoding: [0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0x9 ; encoding: [0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0xa ; encoding: [0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0xa ; encoding: [0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0xb ; encoding: [0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0xb ; encoding: [0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0xc ; encoding: [0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:7], v2, s[8:15] dmask:0xc ; encoding: [0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0xd ; encoding: [0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0xd ; encoding: [0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00
-# GFX90A: image_load a6, v2, s[8:15] dmask:0xe ; encoding: [0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_load a[6:8], v2, s[8:15] dmask:0xe ; encoding: [0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00]
0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_load a5, v2, s[8:15] ; encoding: [0x00,0x00,0x01,0xf0,0x02,0x05,0x02,0x00]
@@ -7944,43 +7944,43 @@
# GFX90A: image_store a1, v2, s[12:19] dmask:0x2 unorm ; encoding: [0x00,0x12,0x21,0xf0,0x02,0x01,0x03,0x00]
0x00,0x12,0x21,0xf0,0x02,0x01,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00
# GFX90A: image_store a1, v2, s[12:19] dmask:0x4 unorm ; encoding: [0x00,0x14,0x21,0xf0,0x02,0x01,0x03,0x00]
0x00,0x14,0x21,0xf0,0x02,0x01,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0x5 unorm ; encoding: [0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x5 unorm ; encoding: [0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0x6 unorm ; encoding: [0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x6 unorm ; encoding: [0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0x7 unorm ; encoding: [0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0x7 unorm ; encoding: [0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00
# GFX90A: image_store a1, v2, s[12:19] dmask:0x8 unorm ; encoding: [0x00,0x18,0x21,0xf0,0x02,0x01,0x03,0x00]
0x00,0x18,0x21,0xf0,0x02,0x01,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0x9 unorm ; encoding: [0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0x9 unorm ; encoding: [0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0xa unorm ; encoding: [0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0xa unorm ; encoding: [0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0xb unorm ; encoding: [0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0xb unorm ; encoding: [0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0xc unorm ; encoding: [0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:3], v2, s[12:19] dmask:0xc unorm ; encoding: [0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0xd unorm ; encoding: [0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0xd unorm ; encoding: [0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0xe unorm ; encoding: [0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:4], v2, s[12:19] dmask:0xe unorm ; encoding: [0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00
-# GFX90A: image_store a2, v2, s[12:19] dmask:0xf unorm ; encoding: [0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00]
+# GFX90A: image_store a[2:5], v2, s[12:19] dmask:0xf unorm ; encoding: [0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00]
0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00
# GFX90A: image_store a1, v2, s[12:19] unorm ; encoding: [0x00,0x10,0x21,0xf0,0x02,0x01,0x03,0x00]
@@ -8016,7 +8016,7 @@
# GFX90A: image_atomic_swap a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x41,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_swap a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_swap a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_swap a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x41,0xf0,0x02,0x05,0x02,0x00]
@@ -8046,7 +8046,7 @@
# GFX90A: image_atomic_cmpswap a[6:7], v2, s[92:99] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x17,0x00]
0x00,0x13,0x45,0xf0,0x02,0x06,0x17,0x00
-# GFX90A: image_atomic_cmpswap a[6:7], v2, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_cmpswap a[6:9], v2, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00]
0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_cmpswap a[6:7], v2, s[8:15] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x45,0xf0,0x02,0x06,0x02,0x00]
@@ -8076,7 +8076,7 @@
# GFX90A: image_atomic_add a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x49,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_add a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_add a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_add a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x49,0xf0,0x02,0x05,0x02,0x00]
@@ -8106,7 +8106,7 @@
# GFX90A: image_atomic_sub a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x4d,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_sub a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_sub a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_sub a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4d,0xf0,0x02,0x05,0x02,0x00]
@@ -8136,7 +8136,7 @@
# GFX90A: image_atomic_smin a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x51,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_smin a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_smin a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_smin a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x51,0xf0,0x02,0x05,0x02,0x00]
@@ -8166,7 +8166,7 @@
# GFX90A: image_atomic_umin a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x55,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_umin a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_umin a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_umin a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x55,0xf0,0x02,0x05,0x02,0x00]
@@ -8196,7 +8196,7 @@
# GFX90A: image_atomic_smax a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x59,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_smax a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_smax a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_smax a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x59,0xf0,0x02,0x05,0x02,0x00]
@@ -8226,7 +8226,7 @@
# GFX90A: image_atomic_umax a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x5d,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_umax a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_umax a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_umax a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x5d,0xf0,0x02,0x05,0x02,0x00]
@@ -8256,7 +8256,7 @@
# GFX90A: image_atomic_and a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x61,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_and a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_and a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_and a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x61,0xf0,0x02,0x05,0x02,0x00]
@@ -8286,7 +8286,7 @@
# GFX90A: image_atomic_or a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x65,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_or a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_or a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_or a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x65,0xf0,0x02,0x05,0x02,0x00]
@@ -8316,7 +8316,7 @@
# GFX90A: image_atomic_xor a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x69,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_xor a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_xor a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_xor a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x69,0xf0,0x02,0x05,0x02,0x00]
@@ -8346,7 +8346,7 @@
# GFX90A: image_atomic_inc a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x6d,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_inc a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_inc a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_inc a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x6d,0xf0,0x02,0x05,0x02,0x00]
@@ -8376,7 +8376,7 @@
# GFX90A: image_atomic_dec a5, v2, s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x17,0x00]
0x00,0x11,0x71,0xf0,0x02,0x05,0x17,0x00
-# GFX90A: image_atomic_dec a6, v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00]
+# GFX90A: image_atomic_dec a[6:7], v2, s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00]
0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00
# GFX90A: image_atomic_dec a5, v2, s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x71,0xf0,0x02,0x05,0x02,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt b/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt
index ced068d669caf..b902fca9288ce 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/mimg_gfx90a.txt
@@ -1,6 +1,6 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX90A
-# GFX90A: image_load v4, v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00]
+# GFX90A: image_load v[4:6], v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00]
0x00,0x17,0x00,0xf0,0xee,0x04,0x07,0x00
# GFX90A: image_load_pck v5, v0, s[8:15] dmask:0x1 glc ; encoding: [0x00,0x21,0x08,0xf0,0x00,0x05,0x02,0x00]
@@ -15,10 +15,10 @@
# GFX90A: image_load_mip_pck v5, v1, s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x10,0xf0,0x01,0x05,0x02,0x00]
0x00,0x01,0x10,0xf0,0x01,0x05,0x02,0x00
-# GFX90A: image_load_mip_pck_sgn v4, v0, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00]
+# GFX90A: image_load_mip_pck_sgn v[4:5], v0, s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00]
0x00,0x05,0x14,0xf0,0x00,0x04,0x02,0x00
-# GFX90A: image_store v192, v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00]
+# GFX90A: image_store v[192:194], v238, s[28:35] dmask:0x7 unorm ; encoding: [0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00]
0x00,0x17,0x20,0xf0,0xee,0xc0,0x07,0x00
# GFX90A: image_store_pck v1, v2, s[12:19] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x28,0xf0,0x02,0x01,0x03,0x00]
More information about the llvm-commits
mailing list