[llvm] [AMDGPU] Add function attribute to control TBUFFER combining (PR #156454)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 20 03:14:57 PDT 2025
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/156454
>From 1a6007023a8900ee1db134cd389bf39eb6b9d6d5 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 2 Sep 2025 20:32:12 +0800
Subject: [PATCH 1/4] [AMDGPU] Add function attribute to disable TBUFFER
combine
---
.../Target/AMDGPU/SILoadStoreOptimizer.cpp | 9 +++
.../Target/AMDGPU/SIMachineFunctionInfo.cpp | 3 +
.../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 10 +++
.../AMDGPU/tbuffer-combine-disable-attr.mir | 65 +++++++++++++++++++
4 files changed, 87 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/tbuffer-combine-disable-attr.mir
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index f0d1117664983..4c0e782f2a451 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -62,6 +62,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"
@@ -2507,6 +2508,14 @@ SILoadStoreOptimizer::collectMergeableInsts(
LLVM_DEBUG(dbgs() << "Skip tbuffer with unknown format: " << MI);
continue;
}
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const auto *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ if (MFI->isTBufferCombineDisabled()) {
+ LLVM_DEBUG(
+ dbgs() << "Skip TBUFFER combine: disabled by function attribute\n");
+ continue;
+ }
}
CombineInfo CI;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 1f11be475e9f8..cd8ef78f6f95a 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -197,6 +197,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
}
ClusterDims = AMDGPU::ClusterDimsAttr::get(F);
+
+ if (F.hasFnAttribute("amdgpu-disable-tbuffer-combine"))
+ setDisableTBufferCombine(true);
}
MachineFunctionInfo *SIMachineFunctionInfo::clone(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 45606153db58e..86f1f3be71ec7 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -293,6 +293,8 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
unsigned PSInputEnable = 0;
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
+ bool DisableTBufferCombine = false;
+
SIMode Mode;
std::optional<FrameIndex> ScavengeFI;
StringValue VGPRForAGPRCopy;
@@ -528,6 +530,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// scheduler stage.
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
+ // Disable combining of TBUFFER instructions.
+ bool DisableTBufferCombine = false;
+
MCPhysReg getNextUserSGPR() const;
MCPhysReg getNextSystemSGPR() const;
@@ -1212,6 +1217,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; }
AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; }
+
+ bool isTBufferCombineDisabled() const { return DisableTBufferCombine; }
+ void setDisableTBufferCombine(bool IsDisabled) {
+ DisableTBufferCombine = IsDisabled;
+ }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/AMDGPU/tbuffer-combine-disable-attr.mir b/llvm/test/CodeGen/AMDGPU/tbuffer-combine-disable-attr.mir
new file mode 100644
index 0000000000000..cd119aacf7496
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/tbuffer-combine-disable-attr.mir
@@ -0,0 +1,65 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=si-load-store-opt -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ target triple = "amdgcn"
+
+ define float @disable-tbuffer-combine(<4 x i32> %vec, i32 %index) #0 {
+ %1 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 0, i32 0, i32 22, i32 0)
+ %2 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 4, i32 0, i32 22, i32 0)
+ %3 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 8, i32 0, i32 22, i32 0)
+ %4 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 12, i32 0, i32 22, i32 0)
+ %5 = bitcast i32 %1 to float
+ %6 = bitcast i32 %2 to float
+ %7 = bitcast i32 %3 to float
+ %8 = bitcast i32 %4 to float
+ %add = fadd float %5, %6
+ %mul = fmul float %7, %8
+ %res = fadd float %add, %mul
+ ret float %res
+ }
+
+ attributes #0 = {"amdgpu-disable-tbuffer-combine"}
+...
+---
+name: disable-tbuffer-combine
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+
+ ; CHECK-LABEL: name: disable-tbuffer-combine
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 8, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 12, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN]], 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN1]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN2]], 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN3]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, killed [[V_MUL_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_1]]
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ %12:vgpr_32 = COPY $vgpr4
+ %11:vgpr_32 = COPY $vgpr3
+ %10:vgpr_32 = COPY $vgpr2
+ %9:vgpr_32 = COPY $vgpr1
+ %8:vgpr_32 = COPY $vgpr0
+ %13:sgpr_128 = REG_SEQUENCE %8, %subreg.sub0, %9, %subreg.sub1, %10, %subreg.sub2, %11, %subreg.sub3
+ %14:sreg_32 = S_MOV_B32 0
+ %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 0, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ %16:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ %17:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 8, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ %18:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN %12, %13, %14, 12, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
+ %19:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed %15, 0, killed %16, 0, 0, implicit $mode, implicit $exec
+ %20:vgpr_32 = nofpexcept V_MUL_F32_e64 0, killed %17, 0, killed %18, 0, 0, implicit $mode, implicit $exec
+ %21:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed %19, 0, killed %20, 0, 0, implicit $mode, implicit $exec
+ $vgpr0 = COPY %21
+ SI_RETURN implicit $vgpr0
+...
>From c3a256b04f8d936ff1dbebf501cfbb06987784eb Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Fri, 12 Sep 2025 16:27:00 +0800
Subject: [PATCH 2/4] [AMDGPU] Use amdgpu-relaxed-tbuffer-oob-mod
---
.../Target/AMDGPU/SILoadStoreOptimizer.cpp | 4 +-
.../Target/AMDGPU/SIMachineFunctionInfo.cpp | 5 +-
.../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 14 +-
.../CodeGen/AMDGPU/merge-tbuffer-gfx10.mir | 102 +++++++++++++
.../CodeGen/AMDGPU/merge-tbuffer-gfx11.mir | 140 ++++++++++++++++++
.../CodeGen/AMDGPU/merge-tbuffer-gfx12.mir | 102 +++++++++++++
.../CodeGen/AMDGPU/merge-tbuffer-gfx9.mir | 104 +++++++++++++
...-attr.mir => relaxed-tbuffer-oob-mode.mir} | 25 ++--
8 files changed, 476 insertions(+), 20 deletions(-)
rename llvm/test/CodeGen/AMDGPU/{tbuffer-combine-disable-attr.mir => relaxed-tbuffer-oob-mode.mir} (70%)
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 4c0e782f2a451..201235908c55d 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -2511,9 +2511,9 @@ SILoadStoreOptimizer::collectMergeableInsts(
const MachineFunction *MF = MI.getParent()->getParent();
const auto *MFI = MF->getInfo<SIMachineFunctionInfo>();
- if (MFI->isTBufferCombineDisabled()) {
+ if (!MFI->isRelaxedTBufferOOBMode()) {
LLVM_DEBUG(
- dbgs() << "Skip TBUFFER combine: disabled by function attribute\n");
+ dbgs() << "Skip tbuffer combine: relaxed mode not enabled\n");
continue;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index cd8ef78f6f95a..084eb1458c2cd 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -198,8 +198,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
ClusterDims = AMDGPU::ClusterDimsAttr::get(F);
- if (F.hasFnAttribute("amdgpu-disable-tbuffer-combine"))
- setDisableTBufferCombine(true);
+ if (F.hasFnAttribute("amdgpu-relaxed-tbuffer-oob-mod"))
+ setRelaxedTBufferOOBMode(true);
}
MachineFunctionInfo *SIMachineFunctionInfo::clone(
@@ -796,6 +796,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
ReturnsVoid = YamlMFI.ReturnsVoid;
IsWholeWaveFunction = YamlMFI.IsWholeWaveFunction;
+ RelaxedTBufferOOBMode = YamlMFI.RelaxedTBufferOOBMode;
if (YamlMFI.ScavengeFI) {
auto FIOrErr = YamlMFI.ScavengeFI->getFI(MF.getFrameInfo());
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 86f1f3be71ec7..12ba04acd87e6 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -293,7 +293,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
unsigned PSInputEnable = 0;
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
- bool DisableTBufferCombine = false;
+ bool RelaxedTBufferOOBMode = false;
SIMode Mode;
std::optional<FrameIndex> ScavengeFI;
@@ -364,6 +364,8 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("scratchReservedForDynamicVGPRs",
MFI.ScratchReservedForDynamicVGPRs, 0);
YamlIO.mapOptional("isWholeWaveFunction", MFI.IsWholeWaveFunction, false);
+ YamlIO.mapOptional("RelaxedTBufferOOBMode", MFI.RelaxedTBufferOOBMode,
+ false);
}
};
@@ -530,8 +532,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// scheduler stage.
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
- // Disable combining of TBUFFER instructions.
- bool DisableTBufferCombine = false;
+ // Enable relaxed TBUFFER out-of-bounds mode. Default is false.
+ bool RelaxedTBufferOOBMode = false;
MCPhysReg getNextUserSGPR() const;
@@ -1218,9 +1220,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; }
- bool isTBufferCombineDisabled() const { return DisableTBufferCombine; }
- void setDisableTBufferCombine(bool IsDisabled) {
- DisableTBufferCombine = IsDisabled;
+ bool isRelaxedTBufferOOBMode() const { return RelaxedTBufferOOBMode; }
+ void setRelaxedTBufferOOBMode(bool Enabled) {
+ RelaxedTBufferOOBMode = Enabled;
}
};
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir
index 402c00298c8da..7124d4d0f680b 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx10.mir
@@ -3,6 +3,8 @@
---
name: gfx10_tbuffer_load_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz
@@ -25,6 +27,8 @@ body: |
---
name: gfx10_tbuffer_load_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x
@@ -47,6 +51,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy
@@ -69,6 +75,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy
@@ -91,6 +99,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_x
@@ -113,6 +123,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x
@@ -135,6 +147,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32
@@ -157,6 +171,8 @@ body: |
---
name: gfx10_tbuffer_load_float_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_float_32
@@ -198,6 +214,8 @@ body: |
---
name: gfx10_tbuffer_load_sint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_sint_32
@@ -239,6 +257,8 @@ body: |
---
name: gfx10_tbuffer_load_uint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_uint_32
@@ -280,6 +300,8 @@ body: |
---
name: gfx10_tbuffer_load_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_data_format_mismatch
@@ -315,6 +337,8 @@ body: |
---
name: gfx10_tbuffer_load_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_num_format_mismatch
@@ -350,6 +374,8 @@ body: |
---
name: gfx10_tbuffer_store_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -384,6 +410,8 @@ body: |
---
name: gfx10_tbuffer_store_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -418,6 +446,8 @@ body: |
---
name: gfx10_tbuffer_store_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -454,6 +484,8 @@ body: |
---
name: gfx10_tbuffer_store_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -488,6 +520,8 @@ body: |
---
name: gfx10_tbuffer_store_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -523,6 +557,8 @@ body: |
---
name: gfx10_tbuffer_store_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -555,6 +591,8 @@ body: |
---
name: gfx10_tbuffer_store_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -587,6 +625,8 @@ body: |
---
name: gfx10_tbuffer_store_float32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -643,6 +683,8 @@ body: |
---
name: gfx10_tbuffer_store_sint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -699,6 +741,8 @@ body: |
---
name: gfx10_tbuffer_store_uint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -755,6 +799,8 @@ body: |
---
name: gfx10_tbuffer_store_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -811,6 +857,8 @@ body: |
---
name: gfx10_tbuffer_store_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -867,6 +915,8 @@ body: |
---
name: gfx10_tbuffer_load_not_merged_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_0
@@ -888,6 +938,8 @@ body: |
---
name: gfx10_tbuffer_load_not_merged_swizzled_1
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_1
@@ -909,6 +961,8 @@ body: |
---
name: gfx10_tbuffer_load_merge_across_swizzle
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_merge_across_swizzle
@@ -933,6 +987,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_idxen
@@ -957,6 +1013,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_idxen
@@ -981,6 +1039,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen
@@ -1005,6 +1065,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xyz_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_idxen
@@ -1029,6 +1091,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_idxen_exact
@@ -1056,6 +1120,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_x_idxen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_idxen_exact_swizzled_0
@@ -1082,6 +1148,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_bothen
@@ -1106,6 +1174,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_bothen
@@ -1130,6 +1200,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen
@@ -1154,6 +1226,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xyz_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_bothen
@@ -1178,6 +1252,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_idxen_exact
@@ -1202,6 +1278,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_idxen_exact
@@ -1226,6 +1304,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen_exact
@@ -1250,6 +1330,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xyz_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_idxen_exact
@@ -1274,6 +1356,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_bothen_exact
@@ -1298,6 +1382,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xy_bothen_exact
@@ -1322,6 +1408,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen_exact
@@ -1346,6 +1434,8 @@ body: |
---
name: gfx10_tbuffer_load_x_xyz_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz_bothen_exact
@@ -1370,6 +1460,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_bothen_exact
@@ -1397,6 +1489,8 @@ body: |
---
name: gfx10_tbuffer_load_x_x_x_bothen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_x_x_x_bothen_exact_swizzled_0
@@ -1423,6 +1517,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
@@ -1448,6 +1544,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
@@ -1475,6 +1573,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
@@ -1500,6 +1600,8 @@ body: |
---
name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir
index f5407a5223166..9f234056c83ec 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx11.mir
@@ -3,6 +3,8 @@
---
name: gfx11_tbuffer_load_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz
@@ -25,6 +27,8 @@ body: |
---
name: gfx11_tbuffer_load_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xyz_x
@@ -47,6 +51,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy
@@ -69,6 +75,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy
@@ -91,6 +99,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_x
@@ -113,6 +123,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x
@@ -135,6 +147,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_format_32_32_32_32
@@ -157,6 +171,8 @@ body: |
---
name: gfx11_tbuffer_load_float_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_float_32
@@ -198,6 +214,8 @@ body: |
---
name: gfx11_tbuffer_load_sint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_sint_32
@@ -239,6 +257,8 @@ body: |
---
name: gfx11_tbuffer_load_uint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_uint_32
@@ -280,6 +300,8 @@ body: |
---
name: gfx11_tbuffer_load_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_data_format_mismatch
@@ -315,6 +337,8 @@ body: |
---
name: gfx11_tbuffer_load_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_num_format_mismatch
@@ -350,6 +374,8 @@ body: |
---
name: gfx11_tbuffer_store_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -384,6 +410,8 @@ body: |
---
name: gfx11_tbuffer_store_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -418,6 +446,8 @@ body: |
---
name: gfx11_tbuffer_store_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -454,6 +484,8 @@ body: |
---
name: gfx11_tbuffer_store_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -488,6 +520,8 @@ body: |
---
name: gfx11_tbuffer_store_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -523,6 +557,8 @@ body: |
---
name: gfx11_tbuffer_store_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -555,6 +591,8 @@ body: |
---
name: gfx11_tbuffer_store_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -587,6 +625,8 @@ body: |
---
name: gfx11_tbuffer_store_float32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -643,6 +683,8 @@ body: |
---
name: gfx11_tbuffer_store_sint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -699,6 +741,8 @@ body: |
---
name: gfx11_tbuffer_store_uint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -755,6 +799,8 @@ body: |
---
name: gfx11_tbuffer_store_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -811,6 +857,8 @@ body: |
---
name: gfx11_tbuffer_store_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -867,6 +915,8 @@ body: |
---
name: gfx11_tbuffer_load_not_merged_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_swizzled_0
@@ -888,6 +938,8 @@ body: |
---
name: gfx11_tbuffer_load_not_merged_swizzled_1
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_not_merged_swizzled_1
@@ -909,6 +961,8 @@ body: |
---
name: gfx11_tbuffer_load_merge_across_swizzle
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_merge_across_swizzle
@@ -933,6 +987,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_idxen
@@ -957,6 +1013,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_idxen
@@ -981,6 +1039,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen
@@ -1005,6 +1065,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xyz_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_idxen
@@ -1029,6 +1091,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_bothen
@@ -1053,6 +1117,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_bothen
@@ -1077,6 +1143,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen
@@ -1101,6 +1169,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xyz_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_bothen
@@ -1125,6 +1195,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_idxen_exact
@@ -1149,6 +1221,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_idxen_exact
@@ -1173,6 +1247,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen_exact
@@ -1197,6 +1273,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xyz_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_idxen_exact
@@ -1221,6 +1299,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_idxen_exact
@@ -1248,6 +1328,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_x_idxen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_idxen_exact_swizzled_0
@@ -1274,6 +1356,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_bothen_exact
@@ -1298,6 +1382,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xy_bothen_exact
@@ -1322,6 +1408,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen_exact
@@ -1346,6 +1434,8 @@ body: |
---
name: gfx11_tbuffer_load_x_xyz_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_xyz_bothen_exact
@@ -1370,6 +1460,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
@@ -1396,6 +1488,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
@@ -1423,6 +1517,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
@@ -1448,6 +1544,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
@@ -1475,6 +1573,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_bothen_exact
@@ -1502,6 +1602,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_x_bothen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX11-LABEL: name: gfx11_tbuffer_load_x_x_x_bothen_exact_swizzled_0
@@ -1528,6 +1630,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_x_idxen_16bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0,$sgpr1,$sgpr2,$sgpr3,$vgpr0
@@ -1550,6 +1654,8 @@ body: |
---
name: gfx11_tbuffer_load_idxen_16_bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
@@ -1598,6 +1704,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen_uint_16_bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0
@@ -1617,6 +1725,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen_sint_16_bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0
@@ -1636,6 +1746,8 @@ body: |
---
name: gfx11_tbuffer_load_x_off2_off4_16bit_no_merge
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
@@ -1664,6 +1776,8 @@ body: |
---
name: gfx11_tbuffer_store_x_x_x_idxen_16_bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
@@ -1696,6 +1810,8 @@ body: |
---
name: gfx11_tbuffer_store_idxen_16_bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -1752,6 +1868,8 @@ body: |
---
name: gfx11_tbuffer_store_xy_xy_uint_16_bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1772,6 +1890,8 @@ body: |
---
name: gfx11_tbuffer_store_xy_xy_sint_16_bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1792,6 +1912,8 @@ body: |
---
name: gfx11_tbuffer_load_x_x_x_idxen_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
@@ -1824,6 +1946,8 @@ body: |
---
name: gfx11_tbuffer_load_idxen_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
@@ -1872,6 +1996,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen_uint_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
@@ -1901,6 +2027,8 @@ body: |
---
name: gfx11_tbuffer_load_xy_xy_idxen_sint_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
@@ -1930,6 +2058,8 @@ body: |
---
name: gfx11_tbuffer_load_x_off3_off4_8bit_no_merge
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
@@ -1958,6 +2088,8 @@ body: |
---
name: gfx11_tbuffer_store_x_x_x_idxen_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2
@@ -1990,6 +2122,8 @@ body: |
---
name: gfx11_tbuffer_store_idxen_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -2046,6 +2180,8 @@ body: |
---
name: gfx11_tbuffer_store_xy_xy_idxen_uint_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -2082,6 +2218,8 @@ body: |
---
name: gfx11_tbuffer_store_xy_xy_idxen_sint_8bit
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -2118,6 +2256,8 @@ body: |
---
name: gfx11_tbuffer_store_x_off3_off4_8bit_no_merge
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir
index 1ee4f9e008197..fb223cf5ed7b8 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx12.mir
@@ -3,6 +3,8 @@
---
name: gfx12_tbuffer_load_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz
@@ -25,6 +27,8 @@ body: |
---
name: gfx12_tbuffer_load_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xyz_x
@@ -47,6 +51,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy
@@ -69,6 +75,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy
@@ -91,6 +99,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_x
@@ -113,6 +123,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x
@@ -135,6 +147,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_format_32_32_32_32
@@ -157,6 +171,8 @@ body: |
---
name: gfx12_tbuffer_load_float_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_float_32
@@ -198,6 +214,8 @@ body: |
---
name: gfx12_tbuffer_load_sint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_sint_32
@@ -239,6 +257,8 @@ body: |
---
name: gfx12_tbuffer_load_uint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_uint_32
@@ -280,6 +300,8 @@ body: |
---
name: gfx12_tbuffer_load_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_data_format_mismatch
@@ -315,6 +337,8 @@ body: |
---
name: gfx12_tbuffer_load_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_num_format_mismatch
@@ -350,6 +374,8 @@ body: |
---
name: gfx12_tbuffer_store_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -384,6 +410,8 @@ body: |
---
name: gfx12_tbuffer_store_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -418,6 +446,8 @@ body: |
---
name: gfx12_tbuffer_store_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -454,6 +484,8 @@ body: |
---
name: gfx12_tbuffer_store_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -488,6 +520,8 @@ body: |
---
name: gfx12_tbuffer_store_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -523,6 +557,8 @@ body: |
---
name: gfx12_tbuffer_store_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -555,6 +591,8 @@ body: |
---
name: gfx12_tbuffer_store_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -587,6 +625,8 @@ body: |
---
name: gfx12_tbuffer_store_float32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -643,6 +683,8 @@ body: |
---
name: gfx12_tbuffer_store_sint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -699,6 +741,8 @@ body: |
---
name: gfx12_tbuffer_store_uint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -755,6 +799,8 @@ body: |
---
name: gfx12_tbuffer_store_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -811,6 +857,8 @@ body: |
---
name: gfx12_tbuffer_store_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -867,6 +915,8 @@ body: |
---
name: gfx12_tbuffer_load_not_merged_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_swizzled_0
@@ -888,6 +938,8 @@ body: |
---
name: gfx12_tbuffer_load_not_merged_swizzled_1
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_not_merged_swizzled_1
@@ -909,6 +961,8 @@ body: |
---
name: gfx12_tbuffer_load_merge_across_swizzle
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_merge_across_swizzle
@@ -933,6 +987,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_idxen
@@ -957,6 +1013,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_idxen
@@ -981,6 +1039,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen
@@ -1005,6 +1065,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xyz_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz_idxen
@@ -1029,6 +1091,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_bothen
@@ -1053,6 +1117,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_bothen
@@ -1077,6 +1143,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_bothen
@@ -1101,6 +1169,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xyz_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz_bothen
@@ -1125,6 +1195,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_idxen_exact
@@ -1149,6 +1221,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_idxen_exact
@@ -1173,6 +1247,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen_exact
@@ -1197,6 +1273,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xyz_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xyz_idxen_exact
@@ -1221,6 +1299,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_idxen_exact
@@ -1248,6 +1328,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_x_idxen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_idxen_exact_swizzled_0
@@ -1274,6 +1356,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_bothen_exact
@@ -1298,6 +1382,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_xy_bothen_exact
@@ -1322,6 +1408,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
@@ -1347,6 +1435,8 @@ body: |
---
name: gfx12_tbuffer_load_x_xyz_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
@@ -1372,6 +1462,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
@@ -1398,6 +1490,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
@@ -1425,6 +1519,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
@@ -1450,6 +1546,8 @@ body: |
---
name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
@@ -1477,6 +1575,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_bothen_exact
@@ -1504,6 +1604,8 @@ body: |
---
name: gfx12_tbuffer_load_x_x_x_bothen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX12-LABEL: name: gfx12_tbuffer_load_x_x_x_bothen_exact_swizzled_0
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir
index 3a43e743de493..fe073fbfd5163 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer-gfx9.mir
@@ -3,6 +3,8 @@
---
name: gfx9_tbuffer_load_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz
@@ -25,6 +27,8 @@ body: |
---
name: gfx9_tbuffer_load_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x
@@ -47,6 +51,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy
@@ -69,6 +75,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy
@@ -91,6 +99,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_x
@@ -114,6 +124,8 @@ body: |
name: gfx9_tbuffer_load_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x
@@ -136,6 +148,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32
@@ -158,6 +172,8 @@ body: |
---
name: gfx9_tbuffer_load_float_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_float_32
@@ -199,6 +215,8 @@ body: |
---
name: gfx9_tbuffer_load_sint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_sint_32
@@ -240,6 +258,8 @@ body: |
---
name: gfx9_tbuffer_load_uint_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_uint_32
@@ -281,6 +301,8 @@ body: |
---
name: gfx9_tbuffer_load_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_data_format_mismatch
@@ -316,6 +338,8 @@ body: |
---
name: gfx9_tbuffer_load_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_num_format_mismatch
@@ -351,6 +375,8 @@ body: |
---
name: gfx9_tbuffer_store_x_xyz
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -385,6 +411,8 @@ body: |
---
name: gfx9_tbuffer_store_xyz_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -419,6 +447,8 @@ body: |
---
name: gfx9_tbuffer_store_xy_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -455,6 +485,8 @@ body: |
---
name: gfx9_tbuffer_store_x_xy
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -489,6 +521,8 @@ body: |
---
name: gfx9_tbuffer_store_xy_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -524,6 +558,8 @@ body: |
---
name: gfx9_tbuffer_store_x_x
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -556,6 +592,8 @@ body: |
---
name: gfx9_tbuffer_store_x_x_format_32_32_32_32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -588,6 +626,8 @@ body: |
---
name: gfx9_tbuffer_store_float32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -644,6 +684,8 @@ body: |
---
name: gfx9_tbuffer_store_sint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -700,6 +742,8 @@ body: |
---
name: gfx9_tbuffer_store_uint32
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -756,6 +800,8 @@ body: |
---
name: gfx9_tbuffer_store_not_merged_data_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -812,6 +858,8 @@ body: |
---
name: gfx9_tbuffer_store_not_merged_num_format_mismatch
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
@@ -868,6 +916,8 @@ body: |
---
name: gfx9_tbuffer_load_not_merged_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_0
@@ -889,6 +939,8 @@ body: |
---
name: gfx9_tbuffer_load_not_merged_swizzled_1
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_1
@@ -910,6 +962,8 @@ body: |
---
name: gfx9_tbuffer_load_merge_across_swizzle
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzle
@@ -934,6 +988,8 @@ body: |
---
name: gfx9_tbuffer_load_merge_across_swizzled_store
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzled_store
@@ -959,6 +1015,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_idxen
@@ -983,6 +1041,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_idxen
@@ -1007,6 +1067,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen
@@ -1031,6 +1093,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xyz_idxen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_idxen
@@ -1055,6 +1119,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_bothen
@@ -1079,6 +1145,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_bothen
@@ -1103,6 +1171,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen
@@ -1127,6 +1197,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xyz_bothen
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_bothen
@@ -1151,6 +1223,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_idxen_exact
@@ -1175,6 +1249,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_idxen_exact
@@ -1199,6 +1275,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen_exact
@@ -1223,6 +1301,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xyz_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_idxen_exact
@@ -1247,6 +1327,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_x_idxen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_idxen_exact
@@ -1274,6 +1356,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_x_idxen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_idxen_exact_swizzled_0
@@ -1300,6 +1384,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_bothen_exact
@@ -1324,6 +1410,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xy_bothen_exact
@@ -1348,6 +1436,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen_exact
@@ -1372,6 +1462,8 @@ body: |
---
name: gfx9_tbuffer_load_x_xyz_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz_bothen_exact
@@ -1396,6 +1488,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_x_bothen_exact
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_bothen_exact
@@ -1423,6 +1517,8 @@ body: |
---
name: gfx9_tbuffer_load_x_x_x_bothen_exact_swizzled_0
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_x_x_x_bothen_exact_swizzled_0
@@ -1449,6 +1545,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_vaddr
@@ -1474,6 +1572,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_bothen_exact_diff_srsrc
@@ -1501,6 +1601,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_vaddr
@@ -1526,6 +1628,8 @@ body: |
---
name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0.entry:
; GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy_idxen_exact_diff_srsrc
diff --git a/llvm/test/CodeGen/AMDGPU/tbuffer-combine-disable-attr.mir b/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir
similarity index 70%
rename from llvm/test/CodeGen/AMDGPU/tbuffer-combine-disable-attr.mir
rename to llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir
index cd119aacf7496..415d0461eb78e 100644
--- a/llvm/test/CodeGen/AMDGPU/tbuffer-combine-disable-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir
@@ -4,7 +4,7 @@
--- |
target triple = "amdgcn"
- define float @disable-tbuffer-combine(<4 x i32> %vec, i32 %index) #0 {
+ define float @relaxed-tbuffer-oob-mode(<4 x i32> %vec, i32 %index) #0 {
%1 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 0, i32 0, i32 22, i32 0)
%2 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 4, i32 0, i32 22, i32 0)
%3 = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %vec, i32 %index, i32 8, i32 0, i32 22, i32 0)
@@ -19,15 +19,17 @@
ret float %res
}
- attributes #0 = {"amdgpu-disable-tbuffer-combine"}
+ attributes #0 = {"amdgpu-relaxed-tbuffer-oob-mod"}
...
---
-name: disable-tbuffer-combine
+name: relaxed-tbuffer-oob-mode
+machineFunctionInfo:
+ RelaxedTBufferOOBMode: true
body: |
bb.0 (%ir-block.0):
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
- ; CHECK-LABEL: name: disable-tbuffer-combine
+ ; CHECK-LABEL: name: relaxed-tbuffer-oob-mode
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
@@ -37,12 +39,15 @@ body: |
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 8, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 12, 22, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
- ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN]], 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN1]], 0, 0, implicit $mode, implicit $exec
- ; CHECK-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN2]], 0, killed [[TBUFFER_LOAD_FORMAT_X_IDXEN3]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 63, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub0_sub1_sub2
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[TBUFFER_LOAD_FORMAT_XYZW_IDXEN]].sub3
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[COPY5]].sub0_sub1
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY5]].sub2
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]].sub1
+ ; CHECK-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[COPY9]], 0, killed [[COPY10]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e64 0, killed [[COPY8]], 0, killed [[COPY6]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[V_ADD_F32_e64_]], 0, killed [[V_MUL_F32_e64_]], 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_1]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
>From e70282112c792aaaf9b300152a6f24df33fba206 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Fri, 12 Sep 2025 17:29:33 +0800
Subject: [PATCH 3/4] [AMDGPU] Fix lit test fail
---
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 1 +
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 3 +--
llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll | 2 ++
.../CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll | 1 +
.../MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll | 1 +
.../MIR/AMDGPU/machine-function-info-long-branch-reg.ll | 1 +
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir | 4 ++++
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll | 4 ++++
8 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 084eb1458c2cd..83753506737c6 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -747,6 +747,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
MaxMemoryClusterDWords(MFI.getMaxMemoryClusterDWords()),
Mode(MFI.getMode()), HasInitWholeWave(MFI.hasInitWholeWave()),
IsWholeWaveFunction(MFI.isWholeWaveFunction()),
+ RelaxedTBufferOOBMode(MFI.isRelaxedTBufferOOBMode()),
DynamicVGPRBlockSize(MFI.getDynamicVGPRBlockSize()),
ScratchReservedForDynamicVGPRs(MFI.getScratchReservedForDynamicVGPRs()) {
for (Register Reg : MFI.getSGPRSpillPhysVGPRs())
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 12ba04acd87e6..66b7d2e35c9f6 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -293,8 +293,6 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
unsigned PSInputEnable = 0;
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
- bool RelaxedTBufferOOBMode = false;
-
SIMode Mode;
std::optional<FrameIndex> ScavengeFI;
StringValue VGPRForAGPRCopy;
@@ -303,6 +301,7 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
bool HasInitWholeWave = false;
bool IsWholeWaveFunction = false;
+ bool RelaxedTBufferOOBMode = false;
unsigned DynamicVGPRBlockSize = 0;
unsigned ScratchReservedForDynamicVGPRs = 0;
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
index 929db4c9be1c7..cbde52ed190c8 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll
@@ -49,6 +49,7 @@
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define amdgpu_kernel void @long_branch_used_all_sgprs(ptr addrspace(1) %arg, i32 %cnd) #0 {
entry:
@@ -321,6 +322,7 @@
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define amdgpu_kernel void @long_branch_high_num_sgprs_used(ptr addrspace(1) %arg, i32 %cnd) #0 {
entry:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
index f054bea1f2780..0fc739df40fc4 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll
@@ -49,6 +49,7 @@
; AFTER-PEI-NEXT: dynamicVGPRBlockSize: 0
; AFTER-PEI-NEXT: scratchReservedForDynamicVGPRs: 0
; AFTER-PEI-NEXT: isWholeWaveFunction: false
+; AFTER-PEI-NEXT: RelaxedTBufferOOBMode: false
; AFTER-PEI-NEXT: body:
define amdgpu_kernel void @scavenge_fi(ptr addrspace(1) %out, i32 %in) #0 {
%wide.sgpr0 = call <32 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
index 924216efcc461..1c8c8bd551934 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll
@@ -49,6 +49,7 @@
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define amdgpu_kernel void @uniform_long_forward_branch_debug(ptr addrspace(1) %arg, i32 %arg1) #0 !dbg !5 {
bb0:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
index 39f1ddd0609d8..8727278ca8061 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll
@@ -49,6 +49,7 @@
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define amdgpu_kernel void @uniform_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) #0 {
bb0:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 0cb9bc095bc50..c3bcad547ba57 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -58,6 +58,7 @@
# FULL-NEXT: dynamicVGPRBlockSize: 0
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
# FULL-NEXT: isWholeWaveFunction: false
+# FULL-NEXT: RelaxedTBufferOOBMode: false
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -168,6 +169,7 @@ body: |
# FULL-NEXT: dynamicVGPRBlockSize: 0
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
# FULL-NEXT: isWholeWaveFunction: false
+# FULL-NEXT: RelaxedTBufferOOBMode: false
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -249,6 +251,7 @@ body: |
# FULL-NEXT: dynamicVGPRBlockSize: 0
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
# FULL-NEXT: isWholeWaveFunction: false
+# FULL-NEXT: RelaxedTBufferOOBMode: false
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@@ -331,6 +334,7 @@ body: |
# FULL-NEXT: dynamicVGPRBlockSize: 0
# FULL-NEXT: scratchReservedForDynamicVGPRs: 0
# FULL-NEXT: isWholeWaveFunction: false
+# FULL-NEXT: RelaxedTBufferOOBMode: false
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index ab4383b675243..1a2881d4ab74a 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -59,6 +59,7 @@
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -111,6 +112,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
%gep = getelementptr inbounds [128 x i32], ptr addrspace(2) @gds, i32 0, i32 %arg0
@@ -187,6 +189,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define void @function() {
ret void
@@ -245,6 +248,7 @@ define void @function() {
; CHECK-NEXT: dynamicVGPRBlockSize: 0
; CHECK-NEXT: scratchReservedForDynamicVGPRs: 0
; CHECK-NEXT: isWholeWaveFunction: false
+; CHECK-NEXT: RelaxedTBufferOOBMode: false
; CHECK-NEXT: body:
define void @function_nsz() #0 {
ret void
>From 7e493ca0573e8982a50659455a4235fb4ebd2a45 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 20 Sep 2025 18:14:35 +0800
Subject: [PATCH 4/4] [AMDGPU] Use module flag
---
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 5 ++++-
llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir | 3 ++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 83753506737c6..281961e82610d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -198,7 +198,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
ClusterDims = AMDGPU::ClusterDimsAttr::get(F);
- if (F.hasFnAttribute("amdgpu-relaxed-tbuffer-oob-mod"))
+ // Enable relaxed TBUFFER OOB mode if amdgpu.oob.mode has bit 0x2 set.
+ if (const auto *CI = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("amdgpu.oob.mode"));
+ CI && (CI->getZExtValue() & 0x2))
setRelaxedTBufferOOBMode(true);
}
diff --git a/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir b/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir
index 415d0461eb78e..84171b1d99204 100644
--- a/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir
+++ b/llvm/test/CodeGen/AMDGPU/relaxed-tbuffer-oob-mode.mir
@@ -19,7 +19,8 @@
ret float %res
}
- attributes #0 = {"amdgpu-relaxed-tbuffer-oob-mod"}
+ !llvm.module.flags = !{!0}
+ !0 = !{i32 4, !"amdgpu.oob.mode", i32 2}
...
---
name: relaxed-tbuffer-oob-mode
More information about the llvm-commits
mailing list