[clang] [llvm] [AMDGPU] Add target intrinsic for s_buffer_prefetch_data (PR #107293)
Stanislav Mekhanoshin via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 6 10:25:25 PDT 2024
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/107293
>From 8361742ca5fe20a3168b3274166909412e225184 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Wed, 4 Sep 2024 12:00:27 -0700
Subject: [PATCH 1/2] [AMDGPU] Add target intrinsic for s_buffer_prefetch_data
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 1 +
.../builtins-amdgcn-gfx12-param-err.cl | 5 +++
.../CodeGenOpenCL/builtins-amdgcn-gfx12.cl | 19 +++++++++-
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 9 +++++
llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 1 +
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 +
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 10 ++++--
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 ++++++++
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 ++
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 8 +++++
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 34 +++++++++++++-----
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 8 +++++
llvm/lib/Target/AMDGPU/SIInstructions.td | 10 ++++++
llvm/lib/Target/AMDGPU/SMInstructions.td | 12 +++++++
.../llvm.amdgcn.s.buffer.prefetch.data.ll | 36 +++++++++++++++++++
16 files changed, 160 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ab29ef38f7792f..77595e1b701336 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -448,6 +448,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vi", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vi", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "b", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b64_v2i32, "V2iV2i*1", "nc", "gfx12-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_global_load_tr_b128_v8i16, "V8sV8s*1", "nc", "gfx12-insts,wavefrontsize32")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
index cd6bfbe647ff36..5d86a9b369429f 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
@@ -22,3 +22,8 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global
__builtin_amdgcn_s_barrier_wait(-1);
*out = *in;
}
+
+void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off)
+{
+ __builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index d9ec258e644c9d..8251ab336fe35b 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -256,4 +256,21 @@ void test_s_ttracedata_imm()
__builtin_amdgcn_s_ttracedata_imm(1);
}
-
+// CHECK-LABEL: @test_s_buffer_prefetch_data(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
+// CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: store ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(5) [[RSRC_ADDR]], align 16
+// CHECK-NEXT: store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP0]], i32 128, i32 [[TMP1]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(8), ptr addrspace(5) [[RSRC_ADDR]], align 16
+// CHECK-NEXT: call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) [[TMP2]], i32 0, i32 31)
+// CHECK-NEXT: ret void
+//
+void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int len)
+{
+ __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 128, len);
+ __builtin_amdgcn_s_buffer_prefetch_data(rsrc, 0, 31);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index dc13a35c66f9ab..57b87f6d29a5f4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1723,6 +1723,15 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_ptr_buffer_load_lds : AMDGPUStructPtrBufferLoadLDS;
+def int_amdgcn_s_buffer_prefetch_data : DefaultAttrsIntrinsic <
+ [],
+ [AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // offset (imm)
+ llvm_i32_ty], // len (SGPR/imm)
+ [IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<1>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>,
+ ClangBuiltin<"__builtin_amdgcn_s_buffer_prefetch_data">;
+
} // defset AMDGPUBufferIntrinsics
// Uses that do not set the done bit should set IntrWriteMem on the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 118271af879937..278d3536add916 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -296,6 +296,7 @@ def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SBYTE, SIsbuffer_load_byte>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_UBYTE, SIsbuffer_load_ubyte>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SSHORT, SIsbuffer_load_short>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_USHORT, SIsbuffer_load_ushort>;
+def : GINodeEquiv<G_AMDGPU_S_BUFFER_PREFETCH, SIsbuffer_prefetch>;
class GISelSop2Pat <
SDPatternOperator node,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 015dbc79ef9e4d..81852f6a130584 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5545,6 +5545,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SBUFFER_LOAD_UBYTE)
NODE_NAME_CASE(SBUFFER_LOAD_SHORT)
NODE_NAME_CASE(SBUFFER_LOAD_USHORT)
+ NODE_NAME_CASE(SBUFFER_PREFETCH_DATA)
NODE_NAME_CASE(BUFFER_STORE)
NODE_NAME_CASE(BUFFER_STORE_BYTE)
NODE_NAME_CASE(BUFFER_STORE_SHORT)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index dd9d97bd593bda..18b5c388f32932 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -589,6 +589,7 @@ enum NodeType : unsigned {
SBUFFER_LOAD_UBYTE,
SBUFFER_LOAD_SHORT,
SBUFFER_LOAD_USHORT,
+ SBUFFER_PREFETCH_DATA,
BUFFER_STORE,
BUFFER_STORE_BYTE,
BUFFER_STORE_SHORT,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3fcb364fc2c536..4dfd3f087c1ae4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5237,7 +5237,8 @@ getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
- std::optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
+ std::optional<uint64_t> OffsetVal =
+ Root.isImm() ? Root.getImm() : getConstantZext32Val(Root.getReg(), *MRI);
if (!OffsetVal)
return {};
@@ -5541,7 +5542,12 @@ void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
- MIB.addImm(MI.getOperand(OpIdx).getImm());
+ const MachineOperand &Op = MI.getOperand(OpIdx);
+ int64_t Imm;
+ if (Op.isReg() && mi_match(Op.getReg(), *MRI, m_ICst(Imm)))
+ MIB.addImm(Imm);
+ else
+ MIB.addImm(Op.getImm());
}
void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 3f6486d44f0ee5..56f4efda7925f1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6797,6 +6797,18 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper,
return true;
}
+bool AMDGPULegalizerInfo::legalizeSBufferPrefetch(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ GISelChangeObserver &Observer = Helper.Observer;
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH));
+ MI.removeOperand(0); // Remove intrinsic ID
+ castBufferRsrcArgToV4I32(MI, B, 0);
+ Observer.changedInstr(MI);
+ return true;
+}
+
// TODO: Move to selection
bool AMDGPULegalizerInfo::legalizeTrap(MachineInstr &MI,
MachineRegisterInfo &MRI,
@@ -7485,6 +7497,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_permlanex16:
case Intrinsic::amdgcn_permlane64:
return legalizeLaneOp(Helper, MI, IntrID);
+ case Intrinsic::amdgcn_s_buffer_prefetch_data:
+ return legalizeSBufferPrefetch(Helper, MI);
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index a815e87a7da35f..84470dc75b60ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -227,6 +227,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeSBufferLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
+ bool legalizeSBufferPrefetch(LegalizerHelper &Helper, MachineInstr &MI) const;
+
bool legalizeTrap(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeTrapEndpgm(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 4737a322c255f4..3c0b57f16d1b76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3101,6 +3101,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingSBufferLoad(B, OpdMapper);
return;
}
+ case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
+ constrainOpWithReadfirstlane(B, MI, 0);
+ constrainOpWithReadfirstlane(B, MI, 2);
+ return;
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
@@ -4454,6 +4458,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
break;
}
+ case AMDGPU::G_AMDGPU_S_BUFFER_PREFETCH:
+ OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 81b52935ddf397..64bf82653144ac 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1210,9 +1210,13 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = RsrcArg;
}
- auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
- if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
- Info.flags |= MachineMemOperand::MOVolatile;
+ bool IsSPrefetch = IntrID == Intrinsic::amdgcn_s_buffer_prefetch_data;
+ if (!IsSPrefetch) {
+ auto *Aux = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1));
+ if (Aux->getZExtValue() & AMDGPU::CPol::VOLATILE)
+ Info.flags |= MachineMemOperand::MOVolatile;
+ }
+
Info.flags |= MachineMemOperand::MODereferenceable;
if (ME.onlyReadsMemory()) {
if (RsrcIntr->IsImage) {
@@ -1251,16 +1255,18 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags |= MachineMemOperand::MOStore;
} else {
- // Atomic or NoReturn Sampler
+ // Atomic, NoReturn Sampler or prefetch
Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
ISD::INTRINSIC_W_CHAIN;
- Info.flags |= MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable;
+ Info.flags |=
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable;
+
+ if (!IsSPrefetch)
+ Info.flags |= MachineMemOperand::MOStore;
switch (IntrID) {
default:
- if (RsrcIntr->IsImage && BaseOpcode->NoReturn) {
+ if ((RsrcIntr->IsImage && BaseOpcode->NoReturn) || IsSPrefetch) {
// Fake memory access type for no return sampler intrinsics
Info.memVT = MVT::i32;
} else {
@@ -9921,6 +9927,18 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
return SDValue(NewMI, 0);
}
+ case Intrinsic::amdgcn_s_buffer_prefetch_data: {
+ SDValue Ops[] = {
+ Chain, bufferRsrcPtrToVector(Op.getOperand(2), DAG),
+ Op.getOperand(3), // offset
+ Op.getOperand(4), // length
+ };
+
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_PREFETCH_DATA, DL,
+ Op->getVTList(), Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ }
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index faa8ca282e7ab8..2ccc16a8f2685e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -312,6 +312,14 @@ class isIntType<ValueType SrcVT> {
bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value));
}
+def SDTSBufferPrefetch : SDTypeProfile<0, 3,
+ [SDTCisVT<0, v4i32>, // rsrc
+ SDTCisVT<1, i32>, // offset(imm)
+ SDTCisVT<2, i32>]>; // length
+
+def SIsbuffer_prefetch : SDNode<"AMDGPUISD::SBUFFER_PREFETCH_DATA", SDTSBufferPrefetch,
+ [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+
//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
// This is for SDNodes and PatFrag for local loads and stores to
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 69e1b9a38324f2..fe04f52ef9108e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3978,6 +3978,16 @@ def G_AMDGPU_S_BUFFER_LOAD_UBYTE : SBufferLoadInstruction;
def G_AMDGPU_S_BUFFER_LOAD_SSHORT : SBufferLoadInstruction;
def G_AMDGPU_S_BUFFER_LOAD_USHORT : SBufferLoadInstruction;
+class SBufferPrefetchInstruction : AMDGPUGenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins type0:$rsrc, untyped_imm_0:$offset, type1:$len);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+def G_AMDGPU_S_BUFFER_PREFETCH : SBufferPrefetchInstruction;
+
def G_AMDGPU_S_MUL_U64_U32 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1);
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 9fc570bb85f24e..bb160ffff7c6b6 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -1152,6 +1152,18 @@ multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
defm : SMPrefetchPat<"INST", i32imm_zero>;
defm : SMPrefetchPat<"DATA", i32imm_one>;
+let SubtargetPredicate = isGFX12Plus in {
+ def : GCNPat <
+ (SIsbuffer_prefetch v4i32:$sbase, (SMRDBufferImm i32:$offset), (i32 SReg_32:$len)),
+ (S_BUFFER_PREFETCH_DATA SReg_128:$sbase, i32imm:$offset, $len, 0)
+ >;
+
+ def : GCNPat <
+ (SIsbuffer_prefetch v4i32:$sbase, (SMRDBufferImm i32:$offset), imm:$len),
+ (S_BUFFER_PREFETCH_DATA SReg_128:$sbase, i32imm:$offset, (i32 SGPR_NULL), (as_i8timm $len))
+ >;
+} // End let SubtargetPredicate = isGFX12Plus
+
//===----------------------------------------------------------------------===//
// GFX10.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll
new file mode 100644
index 00000000000000..88b6df43152352
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefix=GCN %s
+
+declare void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) %rsrc, i32 %offset, i32 %len)
+
+define amdgpu_ps void @buffer_prefetch_data_imm_offset_sgpr_len(ptr addrspace(8) inreg %rsrc, i32 inreg %len) {
+; GCN-LABEL: buffer_prefetch_data_imm_offset_sgpr_len:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_buffer_prefetch_data s[0:3], 0x80, s4, 0
+; GCN-NEXT: s_endpgm
+entry:
+ tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 128, i32 %len)
+ ret void
+}
+
+define amdgpu_ps void @buffer_prefetch_data_imm_offset_imm_len(ptr addrspace(8) inreg %rsrc) {
+; GCN-LABEL: buffer_prefetch_data_imm_offset_imm_len:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_buffer_prefetch_data s[0:3], 0x0, null, 31
+; GCN-NEXT: s_endpgm
+entry:
+ tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 0, i32 31)
+ ret void
+}
+
+define amdgpu_ps void @buffer_prefetch_data_imm_offset_vgpr_len(ptr addrspace(8) inreg %rsrc, i32 %len) {
+; GCN-LABEL: buffer_prefetch_data_imm_offset_vgpr_len:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: v_readfirstlane_b32 s4, v0
+; GCN-NEXT: s_buffer_prefetch_data s[0:3], 0x80, s4, 0
+; GCN-NEXT: s_endpgm
+entry:
+ tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 128, i32 %len)
+ ret void
+}
>From 995aaf2611ce49839ffebc3b674a9386335431f5 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 6 Sep 2024 10:25:01 -0700
Subject: [PATCH 2/2] Added test with vgpr rsrc
---
.../AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll
index 88b6df43152352..fa6fc6047eac2d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.prefetch.data.ll
@@ -34,3 +34,17 @@ entry:
tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 128, i32 %len)
ret void
}
+
+define amdgpu_ps void @buffer_prefetch_data_vgpr_rsrc_imm_offset_sgpr_len(ptr addrspace(8) %rsrc, i32 inreg %len) {
+; GCN-LABEL: buffer_prefetch_data_vgpr_rsrc_imm_offset_sgpr_len:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: v_readfirstlane_b32 s4, v0
+; GCN-NEXT: v_readfirstlane_b32 s5, v1
+; GCN-NEXT: v_readfirstlane_b32 s6, v2
+; GCN-NEXT: v_readfirstlane_b32 s7, v3
+; GCN-NEXT: s_buffer_prefetch_data s[4:7], 0x80, s0, 0
+; GCN-NEXT: s_endpgm
+entry:
+ tail call void @llvm.amdgcn.s.buffer.prefetch.data(ptr addrspace(8) inreg %rsrc, i32 128, i32 %len)
+ ret void
+}
More information about the cfe-commits
mailing list