[llvm] 4a80117 - [AMDGPU][SILoadStoreOptimizer] Merge TBUFFER loads/stores
Piotr Sobczak via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 20 14:00:08 PST 2019
Author: Piotr Sobczak
Date: 2019-11-20T22:59:30+01:00
New Revision: 4a801170f36a2eed13a42730c83cd7bc57729f55
URL: https://github.com/llvm/llvm-project/commit/4a801170f36a2eed13a42730c83cd7bc57729f55
DIFF: https://github.com/llvm/llvm-project/commit/4a801170f36a2eed13a42730c83cd7bc57729f55.diff
LOG: [AMDGPU][SILoadStoreOptimizer] Merge TBUFFER loads/stores
Summary: Extend SILoadStoreOptimizer to merge tbuffer loads and stores.
Reviewers: nhaehnle
Reviewed By: nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69794
Added:
llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 26b8b7840270..8d70536ec21c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -30,6 +30,147 @@ foreach intr = !listconcat(AMDGPUBufferIntrinsics,
def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>;
}
+class GcnBufferFormatBase<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> {
+ bits<8> Format = f;
+ bits<8> BitsPerComp = bpc;
+ bits<8> NumComponents = numc;
+ bits<8> NumFormat = nfmt;
+ bits<8> DataFormat = dfmt;
+}
+
+class Gfx9BufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
+class Gfx10PlusBufferFormat<bits<8> f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase<f, bpc, numc, nfmt, dfmt>;
+
+class GcnBufferFormatTable : GenericTable {
+ let CppTypeName = "GcnBufferFormatInfo";
+ let Fields = ["Format", "BitsPerComp", "NumComponents", "NumFormat", "DataFormat"];
+ let PrimaryKey = ["BitsPerComp", "NumComponents", "NumFormat"];
+}
+
+def Gfx9BufferFormat : GcnBufferFormatTable {
+ let FilterClass = "Gfx9BufferFormat";
+ let PrimaryKeyName = "getGfx9BufferFormatInfo";
+}
+def Gfx10PlusBufferFormat : GcnBufferFormatTable {
+ let FilterClass = "Gfx10PlusBufferFormat";
+ let PrimaryKeyName = "getGfx10PlusBufferFormatInfo";
+}
+
+def getGfx9BufferFormatInfo : SearchIndex {
+ let Table = Gfx9BufferFormat;
+ let Key = ["Format"];
+}
+def getGfx10PlusBufferFormatInfo : SearchIndex {
+ let Table = Gfx10PlusBufferFormat;
+ let Key = ["Format"];
+}
+
+// Buffer formats with equal component sizes (GFX9 and earlier)
+def : Gfx9BufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_SNORM*/ 0x11, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_USCALED*/ 0x21, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_SSCALED*/ 0x31, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_UINT*/ 0x41, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_8_SINT*/ 0x51, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>;
+def : Gfx9BufferFormat< /*FORMAT_16_UNORM*/ 0x02, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_SNORM*/ 0x12, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_USCALED*/ 0x22, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_SSCALED*/ 0x32, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_UINT*/ 0x42, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_SINT*/ 0x52, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_16_FLOAT*/ 0x72, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_UNORM*/ 0x03, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_SNORM*/ 0x13, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_USCALED*/ 0x23, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_SSCALED*/ 0x33, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_UINT*/ 0x43, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_SINT*/ 0x53, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx9BufferFormat< /*FORMAT_32_UINT*/ 0x44, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>;
+def : Gfx9BufferFormat< /*FORMAT_32_SINT*/ 0x54, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>;
+def : Gfx9BufferFormat< /*FORMAT_32_FLOAT*/ 0x74, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_UNORM*/ 0x05, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_SNORM*/ 0x15, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_USCALED*/ 0x25, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_SSCALED*/ 0x35, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_UINT*/ 0x45, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_SINT*/ 0x55, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_FLOAT*/ 0x75, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x0A, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x1A, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x2A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x4A, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x5A, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_UINT*/ 0x4B, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_SINT*/ 0x5B, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_FLOAT*/ 0x7B, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x0C, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x1C, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x2C, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x3C, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x4C, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x5C, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x7C, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_UINT*/ 0x4D, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_SINT*/ 0x5D, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x7D, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4E, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x5E, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x7E, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
+
+// Buffer formats with equal component sizes (GFX10 and later)
+def : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/ 0x02, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/ 0x03, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/ 0x04, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/ 0x05, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/ 0x06, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/ 0x07, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/ 0x08, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/ 0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/ 0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/ 0x0B, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/ 0x0C, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/ 0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/ 0x0E, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/ 0x0F, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/ 0x10, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/ 0x11, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/ 0x12, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/ 0x13, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/ 0x14, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/ 0x15, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/ 0x16, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/ 0x17, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/ 0x18, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/ 0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/ 0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/ 0x1B, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/ 0x1C, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/ 0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x38, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x39, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3B, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x3C, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x3D, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_UINT*/ 0x3E, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_SINT*/ 0x3F, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_FLOAT*/ 0x40, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x41, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x42, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x45, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x46, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x47, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_UINT*/ 0x48, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_SINT*/ 0x49, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4B, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x4C, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>;
+def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>;
+
class SourceOfDivergence<Intrinsic intr> {
Intrinsic Intr = intr;
}
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 24769a89b9ba..1d64bf892ed2 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -99,6 +99,8 @@ enum InstClassEnum {
BUFFER_LOAD,
BUFFER_STORE,
MIMG,
+ TBUFFER_LOAD,
+ TBUFFER_STORE,
};
enum RegisterEnum {
@@ -119,6 +121,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
unsigned Offset1;
unsigned Width0;
unsigned Width1;
+ unsigned Format0;
+ unsigned Format1;
unsigned BaseOff;
unsigned DMask0;
unsigned DMask1;
@@ -206,12 +210,14 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
const GCNSubtarget *STM = nullptr;
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
+ const MCSubtargetInfo *STI = nullptr;
MachineRegisterInfo *MRI = nullptr;
AliasAnalysis *AA = nullptr;
bool OptimizeAgain;
- static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII);
- static bool offsetsCanBeCombined(CombineInfo &CI);
+ static bool dmasksCanBeCombined(const CombineInfo &CI,
+ const SIInstrInfo &TII);
+ static bool offsetsCanBeCombined(CombineInfo &CI, const MCSubtargetInfo &STI);
static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI);
static unsigned getNewOpcode(const CombineInfo &CI);
static std::pair<unsigned, unsigned> getSubRegIdxs(const CombineInfo &CI);
@@ -230,6 +236,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI);
MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI);
MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI);
+ MachineBasicBlock::iterator mergeTBufferLoadPair(CombineInfo &CI);
+ MachineBasicBlock::iterator mergeTBufferStorePair(CombineInfo &CI);
void updateBaseAndOffset(MachineInstr &I, unsigned NewBase,
int32_t NewOffset) const;
@@ -285,6 +293,9 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm();
return countPopulation(DMaskImm);
}
+ if (TII.isMTBUF(Opc)) {
+ return AMDGPU::getMTBUFElements(Opc);
+ }
switch (Opc) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
@@ -323,10 +334,27 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1)
return UNKNOWN;
// TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
- if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc))
+ if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() ||
+ TII.isGather4(Opc))
return UNKNOWN;
return MIMG;
}
+ if (TII.isMTBUF(Opc)) {
+ switch (AMDGPU::getMTBUFBaseOpcode(Opc)) {
+ default:
+ return UNKNOWN;
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET_exact:
+ return TBUFFER_LOAD;
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN_exact:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET:
+ case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET_exact:
+ return TBUFFER_STORE;
+ }
+ }
return UNKNOWN;
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
@@ -357,6 +385,8 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
assert(Info);
return Info->BaseOpcode;
}
+ if (TII.isMTBUF(Opc))
+ return AMDGPU::getMTBUFBaseOpcode(Opc);
return -1;
case AMDGPU::DS_READ_B32:
case AMDGPU::DS_READ_B32_gfx9:
@@ -398,6 +428,24 @@ static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler)
result |= SSAMP;
+
+ return result;
+ }
+ if (TII.isMTBUF(Opc)) {
+ unsigned result = 0;
+
+ if (AMDGPU::getMTBUFHasVAddr(Opc)) {
+ result |= VADDR;
+ }
+
+ if (AMDGPU::getMTBUFHasSrsrc(Opc)) {
+ result |= SRSRC;
+ }
+
+ if (AMDGPU::getMTBUFHasSoffset(Opc)) {
+ result |= SOFFSET;
+ }
+
return result;
}
@@ -420,7 +468,6 @@ static unsigned getRegs(unsigned Opc, const SIInstrInfo &TII) {
}
}
-
void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
const SIInstrInfo &TII,
const GCNSubtarget &STM) {
@@ -457,6 +504,9 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
Offset0 = I->getOperand(OffsetIdx).getImm();
}
+ if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE)
+ Format0 = TII.getNamedOperand(*I, AMDGPU::OpName::format)->getImm();
+
Width0 = getOpcodeWidth(*I, TII);
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
@@ -518,6 +568,9 @@ void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI
Offset1 = Paired->getOperand(OffsetIdx).getImm();
}
+ if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE)
+ Format1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::format)->getImm();
+
Width1 = getOpcodeWidth(*Paired, TII);
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
Offset1 &= 0xffff;
@@ -530,7 +583,6 @@ void SILoadStoreOptimizer::CombineInfo::setPaired(MachineBasicBlock::iterator MI
}
}
-
} // end anonymous namespace.
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
@@ -671,7 +723,33 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIIn
return true;
}
-bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
+static unsigned getBufferFormatWithCompCount(unsigned OldFormat,
+ unsigned ComponentCount,
+ const MCSubtargetInfo &STI) {
+ if (ComponentCount > 4)
+ return 0;
+
+ const llvm::AMDGPU::GcnBufferFormatInfo *OldFormatInfo =
+ llvm::AMDGPU::getGcnBufferFormatInfo(OldFormat, STI);
+ if (!OldFormatInfo)
+ return 0;
+
+ const llvm::AMDGPU::GcnBufferFormatInfo *NewFormatInfo =
+ llvm::AMDGPU::getGcnBufferFormatInfo(OldFormatInfo->BitsPerComp,
+ ComponentCount,
+ OldFormatInfo->NumFormat, STI);
+
+ if (!NewFormatInfo)
+ return 0;
+
+ assert(NewFormatInfo->NumFormat == OldFormatInfo->NumFormat &&
+ NewFormatInfo->BitsPerComp == OldFormatInfo->BitsPerComp);
+
+ return NewFormatInfo->Format;
+}
+
+bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
+ const MCSubtargetInfo &STI) {
assert(CI.InstClass != MIMG);
// XXX - Would the same offset be OK? Is there any reason this would happen or
@@ -683,6 +761,30 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
if ((CI.Offset0 % CI.EltSize != 0) || (CI.Offset1 % CI.EltSize != 0))
return false;
+ if (CI.InstClass == TBUFFER_LOAD || CI.InstClass == TBUFFER_STORE) {
+
+ const llvm::AMDGPU::GcnBufferFormatInfo *Info0 =
+ llvm::AMDGPU::getGcnBufferFormatInfo(CI.Format0, STI);
+ if (!Info0)
+ return false;
+ const llvm::AMDGPU::GcnBufferFormatInfo *Info1 =
+ llvm::AMDGPU::getGcnBufferFormatInfo(CI.Format1, STI);
+ if (!Info1)
+ return false;
+
+ if (Info0->BitsPerComp != Info1->BitsPerComp ||
+ Info0->NumFormat != Info1->NumFormat)
+ return false;
+
+ // TODO: Should be possible to support more formats, but if format loads
+ // are not dword-aligned, the merged load might not be valid.
+ if (Info0->BitsPerComp != 32)
+ return false;
+
+ if (getBufferFormatWithCompCount(CI.Format0, CI.Width0 + CI.Width1, STI) == 0)
+ return false;
+ }
+
unsigned EltOffset0 = CI.Offset0 / CI.EltSize;
unsigned EltOffset1 = CI.Offset1 / CI.EltSize;
CI.UseST64 = false;
@@ -814,6 +916,11 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
if (MBBI->hasOrderedMemoryRef())
return false;
+ int Swizzled =
+ AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz);
+ if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm())
+ return false;
+
// Handle a case like
// DS_WRITE_B32 addr, v, idx0
// w = DS_READ_B32 addr, idx0
@@ -834,7 +941,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
bool canBeCombined =
CI.InstClass == MIMG
? dmasksCanBeCombined(CI, *TII)
- : widthsFit(*STM, CI) && offsetsCanBeCombined(CI);
+ : widthsFit(*STM, CI) && offsetsCanBeCombined(CI, *STI);
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
@@ -1201,6 +1308,136 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
return New;
}
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeTBufferLoadPair(CombineInfo &CI) {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ const unsigned Opcode = getNewOpcode(CI);
+
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+
+ // Copy to the new source register.
+ Register DestReg = MRI->createVirtualRegister(SuperRC);
+ unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1);
+
+ auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg);
+
+ const unsigned Regs = getRegs(Opcode, *TII);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+
+ unsigned JoinedFormat =
+ getBufferFormatWithCompCount(CI.Format0, CI.Width0 + CI.Width1, *STI);
+
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+
+ MachineInstr *New =
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+ .addImm(MergedOffset) // offset
+ .addImm(JoinedFormat) // format
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
+ .addImm(0) // swz
+ .addMemOperand(
+ combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+ // Copy to the old destination registers.
+ const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+ const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
+ const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
+
+ BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
+
+ moveInstsAfter(Copy1, CI.InstsToMove);
+
+ CI.I->eraseFromParent();
+ CI.Paired->eraseFromParent();
+ return New;
+}
+
+MachineBasicBlock::iterator
+SILoadStoreOptimizer::mergeTBufferStorePair(CombineInfo &CI) {
+ MachineBasicBlock *MBB = CI.I->getParent();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ const unsigned Opcode = getNewOpcode(CI);
+
+ std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
+ const unsigned SubRegIdx0 = std::get<0>(SubRegIdx);
+ const unsigned SubRegIdx1 = std::get<1>(SubRegIdx);
+
+ // Copy to the new source register.
+ const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI);
+ Register SrcReg = MRI->createVirtualRegister(SuperRC);
+
+ const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata);
+ const auto *Src1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata);
+
+ BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg)
+ .add(*Src0)
+ .addImm(SubRegIdx0)
+ .add(*Src1)
+ .addImm(SubRegIdx1);
+
+ auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode))
+ .addReg(SrcReg, RegState::Kill);
+
+ const unsigned Regs = getRegs(Opcode, *TII);
+
+ if (Regs & VADDR)
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr));
+
+ unsigned JoinedFormat =
+ getBufferFormatWithCompCount(CI.Format0, CI.Width0 + CI.Width1, *STI);
+
+ // It shouldn't be possible to get this far if the two instructions
+ // don't have a single memoperand, because MachineInstr::mayAlias()
+ // will return true if this is the case.
+ assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand());
+
+ const MachineMemOperand *MMOa = *CI.I->memoperands_begin();
+ const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin();
+
+ MachineInstr *New =
+ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
+ .addImm(std::min(CI.Offset0, CI.Offset1)) // offset
+ .addImm(JoinedFormat) // format
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
+ .addImm(0) // swz
+ .addMemOperand(
+ combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
+
+ moveInstsAfter(MIB, CI.InstsToMove);
+
+ CI.I->eraseFromParent();
+ CI.Paired->eraseFromParent();
+ return New;
+}
+
unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
const unsigned Width = CI.Width0 + CI.Width1;
@@ -1210,6 +1447,11 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) {
// FIXME: Handle d16 correctly
return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()),
Width);
+ case TBUFFER_LOAD:
+ case TBUFFER_STORE:
+ return AMDGPU::getMTBUFOpcode(AMDGPU::getMTBUFBaseOpcode(CI.I->getOpcode()),
+ Width);
+
case UNKNOWN:
llvm_unreachable("Unknown instruction class");
case S_BUFFER_LOAD_IMM:
@@ -1819,6 +2061,24 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
}
break;
+ case TBUFFER_LOAD:
+ if (findMatchingInst(CI)) {
+ Modified = true;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeTBufferLoadPair(CI);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
+ }
+ break;
+ case TBUFFER_STORE:
+ if (findMatchingInst(CI)) {
+ Modified = true;
+ removeCombinedInst(MergeList, *CI.Paired);
+ MachineBasicBlock::iterator NewMI = mergeTBufferStorePair(CI);
+ CI.setMI(NewMI, *TII, *STM);
+ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4;
+ }
+ break;
}
// Clear the InstsToMove after we have finished searching so we don't have
// stale values left over if we search for this CI again in another pass
@@ -1839,6 +2099,7 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
TII = STM->getInstrInfo();
TRI = &TII->getRegisterInfo();
+ STI = &MF.getSubtarget<MCSubtargetInfo>();
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a4b216f583dc..81d3697e79ba 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1328,6 +1328,8 @@ struct SourceOfDivergence {
const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
#define GET_SourcesOfDivergence_IMPL
+#define GET_Gfx9BufferFormat_IMPL
+#define GET_Gfx10PlusBufferFormat_IMPL
#include "AMDGPUGenSearchableTables.inc"
} // end anonymous namespace
@@ -1336,5 +1338,21 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
return lookupSourceOfDivergence(IntrID);
}
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
+ uint8_t NumComponents,
+ uint8_t NumFormat,
+ const MCSubtargetInfo &STI) {
+ return isGFX10(STI)
+ ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
+ NumFormat)
+ : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
+}
+
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
+ const MCSubtargetInfo &STI) {
+ return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format)
+ : getGfx9BufferFormatInfo(Format);
+}
+
} // namespace AMDGPU
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 05bb39235a4b..a5bada2890d2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -41,6 +41,14 @@ class Triple;
namespace AMDGPU {
+struct GcnBufferFormatInfo {
+ unsigned Format;
+ unsigned BitsPerComp;
+ unsigned NumComponents;
+ unsigned NumFormat;
+ unsigned DataFormat;
+};
+
#define GET_MIMGBaseOpcode_DECL
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
@@ -299,6 +307,15 @@ bool getMUBUFHasSrsrc(unsigned Opc);
LLVM_READONLY
bool getMUBUFHasSoffset(unsigned Opc);
+LLVM_READONLY
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
+ uint8_t NumComponents,
+ uint8_t NumFormat,
+ const MCSubtargetInfo &STI);
+LLVM_READONLY
+const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
+ const MCSubtargetInfo &STI);
+
LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
@@ -646,7 +663,6 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
-
// Track defaults for fields in the MODE registser.
struct SIModeRegisterDefaults {
/// Floating point opcodes that support exception flag gathering quiet and
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
new file mode 100644
index 000000000000..41b83643d91b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
@@ -0,0 +1,1559 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s
+
+#
+# GFX9 tests
+#
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3
+name: gfx9_tbuffer_load_x_xyz
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3
+name: gfx9_tbuffer_load_xyz_x
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3
+name: gfx9_tbuffer_load_xy_xy
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_x_xy
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2
+name: gfx9_tbuffer_load_x_xy
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_xy_x
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2
+name: gfx9_tbuffer_load_xy_x
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_x_x
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
+
+name: gfx9_tbuffer_load_x_x
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
+
+name: gfx9_tbuffer_load_x_x_format_32_32_32_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_float_32
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1
+
+name: gfx9_tbuffer_load_float_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_sint_32
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1
+
+name: gfx9_tbuffer_load_sint_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_uint_32
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
+# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1
+# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1
+
+name: gfx9_tbuffer_load_uint_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_data_format_mismatch
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+
+
+name: gfx9_tbuffer_load_not_merged_data_format_mismatch
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_num_format_mismatch
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+name: gfx9_tbuffer_load_not_merged_num_format_mismatch
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_x_xyz
+# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
+# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+name: gfx9_tbuffer_store_x_xyz
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+...
+---
+
+
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_xyz_x
+# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
+# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+name: gfx9_tbuffer_store_xyz_x
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_xy_xy
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+name: gfx9_tbuffer_store_xy_xy
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
+ %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_x_xy
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx9_tbuffer_store_x_xy
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_xy_x
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx9_tbuffer_store_xy_x
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_x_x
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+name: gfx9_tbuffer_store_x_x
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_x_x_format_32_32_32_32
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+name: gfx9_tbuffer_store_x_x_format_32_32_32_32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_float32
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx9_tbuffer_store_float32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_sint32
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 93, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx9_tbuffer_store_sint32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_uint32
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
+# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3
+# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
+# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx9_tbuffer_store_uint32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_data_format_mismatch
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+name: gfx9_tbuffer_store_not_merged_data_format_mismatch
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_num_format_mismatch
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+name: gfx9_tbuffer_store_not_merged_num_format_mismatch
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_0
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+name: gfx9_tbuffer_load_not_merged_swizzled_0
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_1
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+name: gfx9_tbuffer_load_not_merged_swizzled_1
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+#
+# GFX10 tests
+#
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3
+name: gfx10_tbuffer_load_x_xyz
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3
+name: gfx10_tbuffer_load_xyz_x
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3
+name: gfx10_tbuffer_load_xy_xy
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_x_xy
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2
+name: gfx10_tbuffer_load_x_xy
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_xy_x
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2
+name: gfx10_tbuffer_load_xy_x
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_x_x
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
+
+name: gfx10_tbuffer_load_x_x
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1
+
+name: gfx10_tbuffer_load_x_x_format_32_32_32_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_float_32
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1
+
+name: gfx10_tbuffer_load_float_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_sint_32
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1
+
+name: gfx10_tbuffer_load_sint_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_uint_32
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1
+# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1
+# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1
+
+name: gfx10_tbuffer_load_uint_32
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_data_format_mismatch
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+
+
+name: gfx10_tbuffer_load_not_merged_data_format_mismatch
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_num_format_mismatch
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+name: gfx10_tbuffer_load_not_merged_num_format_mismatch
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_x_xyz
+# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
+# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+name: gfx10_tbuffer_store_x_xyz
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+...
+---
+
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_xyz_x
+# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2
+# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+name: gfx10_tbuffer_store_xyz_x
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2
+ TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_xy_xy
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+name: gfx10_tbuffer_store_xy_xy
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
+ %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_x_xy
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx10_tbuffer_store_x_xy
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_xy_x
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx10_tbuffer_store_xy_x
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1
+ TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_x_x
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+name: gfx10_tbuffer_store_x_x
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_x_x_format_32_32_32_32
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+name: gfx10_tbuffer_store_x_x_format_32_32_32_32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_float32
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx10_tbuffer_store_float32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_sint32
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 76, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 73, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx10_tbuffer_store_sint32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_uint32
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1
+# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3
+# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1
+# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2
+# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4)
+name: gfx10_tbuffer_store_uint32
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_data_format_mismatch
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+name: gfx10_tbuffer_store_not_merged_data_format_mismatch
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+# GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_num_format_mismatch
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1
+# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1
+# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0
+# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+name: gfx10_tbuffer_store_not_merged_num_format_mismatch
+body: |
+ bb.0.entry:
+ liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ %12:vgpr_32 = COPY $vgpr8
+ %11:vgpr_32 = COPY $vgpr7
+ %10:vgpr_32 = COPY $vgpr6
+ %9:vgpr_32 = COPY $vgpr5
+ %8:vgpr_32 = COPY $vgpr4
+ %7:vgpr_32 = COPY $vgpr3
+ %6:vgpr_32 = COPY $vgpr2
+ %5:vgpr_32 = COPY $vgpr1
+ %4:vgpr_32 = COPY $vgpr0
+ %3:sgpr_32 = COPY $sgpr3
+ %2:sgpr_32 = COPY $sgpr2
+ %1:sgpr_32 = COPY $sgpr1
+ %0:sgpr_32 = COPY $sgpr0
+ %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_0
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+name: gfx10_tbuffer_load_not_merged_swizzled_0
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_1
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+name: gfx10_tbuffer_load_not_merged_swizzled_1
+body: |
+ bb.0.entry:
+ %0:sgpr_32 = COPY $sgpr0
+ %1:sgpr_32 = COPY $sgpr1
+ %2:sgpr_32 = COPY $sgpr2
+ %3:sgpr_32 = COPY $sgpr3
+ %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+ %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+ %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4)
+...
+---
+
More information about the llvm-commits
mailing list