[llvm] [RFC][AMDGPU] Remove old llvm.amdgcn.buffer.* and tbuffer intrinsics (PR #93801)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 30 03:47:51 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
They have been superseded by llvm.amdgcn.raw.buffer.* and
llvm.amdgcn.struct.buffer.*.
---
Patch is 333.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93801.diff
39 Files Affected:
- (modified) llvm/docs/TableGen/BackGuide.rst (+1-1)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+3-107)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp (-9)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp (-5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td (-12)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+2-244)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.h (+3-3)
- (modified) llvm/test/Analysis/UniformityAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll (-100)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll (+90-186)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-schedule.ll (-44)
- (removed) llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll (-19)
- (removed) llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll (-141)
- (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+52-136)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll (-82)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll (-93)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll (-111)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll (-209)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.dwordx3.ll (-22)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll (-55)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll (-133)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll (-476)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.dwordx3.ll (-20)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll (-63)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll (-104)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll (-268)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll (-16)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll (-14)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll (-19)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll (+2-2)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll (-55)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.dwordx3.ll (-13)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll (-109)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll (-76)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.dwordx3.ll (-10)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll (-110)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll (+1-660)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll (+1-660)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-simplify-image-buffer-stores.ll (-29)
- (modified) llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll (-23)
``````````diff
diff --git a/llvm/docs/TableGen/BackGuide.rst b/llvm/docs/TableGen/BackGuide.rst
index e1413c1c73a79..60677a6dcd627 100644
--- a/llvm/docs/TableGen/BackGuide.rst
+++ b/llvm/docs/TableGen/BackGuide.rst
@@ -761,7 +761,7 @@ over time. The output looks like this.
-------------------- Global Variables (5) --------------------
- AMDGPUBufferIntrinsics = [int_amdgcn_buffer_load_format, ...
+ AMDGPUBufferIntrinsics = [int_amdgcn_s_buffer_load, ...
AMDGPUImageDimAtomicIntrinsics = [int_amdgcn_image_atomic_swap_1d, ...
...
-------------------- Classes (758) --------------------
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index d4a8954a4cdac..be4a1b8b92767 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1072,18 +1072,6 @@ def int_amdgcn_make_buffer_rsrc : DefaultAttrsIntrinsic <
defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
-class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
- [data_ty],
- [llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // offset(SGPR/VGPR/imm)
- llvm_i1_ty, // glc(imm)
- llvm_i1_ty], // slc(imm)
- [IntrReadMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<0>;
-def int_amdgcn_buffer_load_format : AMDGPUBufferLoad<llvm_anyfloat_ty>;
-def int_amdgcn_buffer_load : AMDGPUBufferLoad;
-
// Generate a buffer_load instruction that may be optimized to s_buffer_load if
// the offset argument is uniform.
def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic <
@@ -1100,25 +1088,12 @@ def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic <
[IntrNoMem, ImmArg<ArgIndex<2>>]>,
AMDGPURsrcIntrinsic<0>;
-class AMDGPUBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
- [],
- [data_ty, // vdata(VGPR)
- llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // offset(SGPR/VGPR/imm)
- llvm_i1_ty, // glc(imm)
- llvm_i1_ty], // slc(imm)
- [IntrWriteMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<1>;
-def int_amdgcn_buffer_store_format : AMDGPUBufferStore<llvm_anyfloat_ty>;
-def int_amdgcn_buffer_store : AMDGPUBufferStore;
-
-// New buffer intrinsics with separate raw and struct variants. The raw
+// Buffer intrinsics with separate raw and struct variants. The raw
// variant never has an index. The struct variant always has an index, even if
// it is const 0. A struct intrinsic with constant 0 index is different to the
// corresponding raw intrinsic on gfx9+ because the behavior of bound checking
// and swizzling changes depending on whether idxen is set in the instruction.
-// These new instrinsics also keep the offset and soffset arguments separate as
+// These instrinsics also keep the offset and soffset arguments separate as
// they behave differently in bounds checking and swizzling.
// The versions of these intrinsics that take <4 x i32> arguments are deprecated
@@ -1478,41 +1453,7 @@ def int_amdgcn_struct_buffer_atomic_fmax : AMDGPUStructBufferAtomic<llvm_anyfloa
def int_amdgcn_struct_ptr_buffer_atomic_fmin : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_struct_ptr_buffer_atomic_fmax : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
-// Obsolescent tbuffer intrinsics.
-def int_amdgcn_tbuffer_load : DefaultAttrsIntrinsic <
- [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
- [llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // voffset(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // offset(imm)
- llvm_i32_ty, // dfmt(imm)
- llvm_i32_ty, // nfmt(imm)
- llvm_i1_ty, // glc(imm)
- llvm_i1_ty], // slc(imm)
- [IntrReadMem,
- ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
- ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<8>>], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<0>;
-
-def int_amdgcn_tbuffer_store : DefaultAttrsIntrinsic <
- [],
- [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
- llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // voffset(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // offset(imm)
- llvm_i32_ty, // dfmt(imm)
- llvm_i32_ty, // nfmt(imm)
- llvm_i1_ty, // glc(imm)
- llvm_i1_ty], // slc(imm)
- [IntrWriteMem, ImmArg<ArgIndex<5>>,
- ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
- ImmArg<ArgIndex<8>>, ImmArg<ArgIndex<9>>], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<1>;
-
-// New tbuffer intrinsics, with:
+// tbuffer intrinsics, with:
// - raw and struct variants
// - joint format field
// - joint cachepolicy field
@@ -1659,51 +1600,6 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <
ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
-class AMDGPUBufferAtomic : Intrinsic <
- [llvm_anyint_ty],
- [LLVMMatchType<0>, // vdata(VGPR)
- llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // offset(SGPR/VGPR/imm)
- llvm_i1_ty], // slc(imm)
- [ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<1, 0>;
-def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
-def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
- [llvm_i32_ty],
- [llvm_i32_ty, // src(VGPR)
- llvm_i32_ty, // cmp(VGPR)
- llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // offset(SGPR/VGPR/imm)
- llvm_i1_ty], // slc(imm)
- [ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<2, 0>;
-
-def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic;
-
-class AMDGPUBufferAtomicFP : Intrinsic <
- [llvm_anyfloat_ty],
- [LLVMMatchType<0>, // vdata(VGPR)
- llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // vindex(VGPR)
- llvm_i32_ty, // offset(SGPR/VGPR/imm)
- llvm_i1_ty], // slc(imm)
- [ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
- AMDGPURsrcIntrinsic<1, 0>;
-
-// Legacy form of the intrinsic. raw and struct forms should be preferred.
-def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
-
class AMDGPURawBufferLoadLDS : Intrinsic <
[],
[llvm_v4i32_ty, // rsrc(SGPR)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 1d645002b1fe6..38cc5a9bef969 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -249,63 +249,54 @@ void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
default:
return;
- case Intrinsic::amdgcn_buffer_atomic_add:
case Intrinsic::amdgcn_struct_buffer_atomic_add:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
case Intrinsic::amdgcn_raw_buffer_atomic_add:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
Op = AtomicRMWInst::Add;
break;
- case Intrinsic::amdgcn_buffer_atomic_sub:
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
Op = AtomicRMWInst::Sub;
break;
- case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_struct_buffer_atomic_and:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
case Intrinsic::amdgcn_raw_buffer_atomic_and:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
Op = AtomicRMWInst::And;
break;
- case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_struct_buffer_atomic_or:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
case Intrinsic::amdgcn_raw_buffer_atomic_or:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
Op = AtomicRMWInst::Or;
break;
- case Intrinsic::amdgcn_buffer_atomic_xor:
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
Op = AtomicRMWInst::Xor;
break;
- case Intrinsic::amdgcn_buffer_atomic_smin:
case Intrinsic::amdgcn_struct_buffer_atomic_smin:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
case Intrinsic::amdgcn_raw_buffer_atomic_smin:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
Op = AtomicRMWInst::Min;
break;
- case Intrinsic::amdgcn_buffer_atomic_umin:
case Intrinsic::amdgcn_struct_buffer_atomic_umin:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
case Intrinsic::amdgcn_raw_buffer_atomic_umin:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
Op = AtomicRMWInst::UMin;
break;
- case Intrinsic::amdgcn_buffer_atomic_smax:
case Intrinsic::amdgcn_struct_buffer_atomic_smax:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
case Intrinsic::amdgcn_raw_buffer_atomic_smax:
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
Op = AtomicRMWInst::Max;
break;
- case Intrinsic::amdgcn_buffer_atomic_umax:
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 160a17584ca3a..93bca4402ed23 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1158,12 +1158,10 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
break;
}
- case Intrinsic::amdgcn_buffer_store_format:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store:
- case Intrinsic::amdgcn_tbuffer_store:
case Intrinsic::amdgcn_image_store_1d:
case Intrinsic::amdgcn_image_store_1darray:
case Intrinsic::amdgcn_image_store_2d:
@@ -1376,8 +1374,6 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) const {
switch (II.getIntrinsicID()) {
- case Intrinsic::amdgcn_buffer_load:
- case Intrinsic::amdgcn_buffer_load_format:
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
@@ -1391,7 +1387,6 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
- case Intrinsic::amdgcn_tbuffer_load:
return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
default: {
if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 410dc83d45c57..e84d39a2895c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -256,17 +256,6 @@ def : SourceOfDivergence<int_amdgcn_ds_fadd>;
def : SourceOfDivergence<int_amdgcn_ds_fmin>;
def : SourceOfDivergence<int_amdgcn_ds_fmax>;
def : SourceOfDivergence<int_amdgcn_ds_fadd_v2bf16>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_swap>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_add>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_sub>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_smin>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_umin>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_smax>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_umax>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_and>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
@@ -339,7 +328,6 @@ def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cond_sub_u32>;
-def : SourceOfDivergence<int_amdgcn_buffer_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_live_mask>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f9948e92862f7..d9b7dc2eb59d2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1280,19 +1280,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
- case Intrinsic::amdgcn_buffer_atomic_fadd: {
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
- Info.align.reset();
- Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
-
- const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
- if (!Vol || !Vol->isZero())
- Info.flags |= MachineMemOperand::MOVolatile;
-
- return true;
- }
case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
@@ -8732,43 +8719,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
- case Intrinsic::amdgcn_buffer_load:
- case Intrinsic::amdgcn_buffer_load_format: {
- unsigned Glc = Op.getConstantOperandVal(5);
- unsigned Slc = Op.getConstantOperandVal(6);
- unsigned IdxEn = getIdxEn(Op.getOperand(3));
- SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // rsrc
- Op.getOperand(3), // vindex
- SDValue(), // voffset -- will be set by setBufferOffsets
- SDValue(), // soffset -- will be set by setBufferOffsets
- SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
- };
- setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
-
- unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
- AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
-
- EVT VT = Op.getValueType();
- EVT IntVT = VT.changeTypeToInteger();
- auto *M = cast<MemSDNode>(Op);
- EVT LoadVT = Op.getValueType();
-
- if (LoadVT.getScalarType() == MVT::f16)
- return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
- M, DAG, Ops);
-
- // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
- if (LoadVT.getScalarType() == MVT::i8 || LoadVT.getScalarType() == MVT::i16)
- return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops,
- M->getMemOperand());
-
- return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
- M->getMemOperand(), DAG);
- }
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
@@ -8818,35 +8768,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
}
- case Intrinsic::amdgcn_tbuffer_load: {
- MemSDNode *M = cast<MemSDNode>(Op);
- EVT LoadVT = Op.getValueType();
-
- auto SOffset = selectSOffset(Op.getOperand(5), DAG, Subtarget);
- unsigned Dfmt = Op.getConstantOperandVal(7);
- unsigned Nfmt = Op.getConstantOperandVal(8);
- unsigned Glc = Op.getConstantOperandVal(9);
- unsigned Slc = Op.getConstantOperandVal(10);
- unsigned IdxEn = getIdxEn(Op.getOperand(3));
- SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // rsrc
- Op.getOperand(3), // vindex
- Op.getOperand(4), // voffset
- SOffset, // soffset
- Op.getOperand(6), // offset
- DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
- DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
- };
-
- if (LoadVT.getScalarType() == MVT::f16)
- return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
- M, DAG, Ops);
- return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
- Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
- DAG);
- }
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
@@ -8901,82 +8822,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
DAG);
}
- case Intrinsic::amdgcn_buffer_atomic_swap:
- case Intrinsic::amdgcn_buffer_atomic_add:
- case Intrinsic::amdgcn_buffer_atomic_sub:
- case Intrinsic::amdgcn_buffer_atomic_csub:
- case Intrinsic::amdgcn_buffer_atomic_smin:
- case Intrinsic::amdgcn_buffer_atomic_umin:
- case Intrinsic::amdgcn_buffer_atomic_smax:
- case Intrinsic::amdgcn_buffer_atomic_umax:
- case Intrinsic::amdgcn_buffer_atomic_and:
- case Intrinsic::amdgcn_buffer_atomic_or:
- case Intrinsic::amdgcn_buffer_atomic_xor:
- case Intrinsic::amdgcn_buffer_atomic_fadd: {
- unsigned Slc = Op.getConstantOperandVal(6);
- unsigned IdxEn = getIdxEn(Op.getOperand(4));
- SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // vdata
- Op.getOperand(3), // rsrc
- Op.getOperand(4), // vindex
- SDValue(), // voffset -- will be set by setBufferOffsets
- SDValue(), // soffset -- will be set by setBufferOffsets
- SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
- };
- setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
-
- EVT VT = Op.getValueType();
-
- auto *M = cast<MemSDNode>(Op);
- unsigned Opcode = 0;
-
- switch (IntrID) {
- case Intrinsic::amdgcn_buffer_atomic_swap:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
- break;
- case Intrinsic::amdgcn_buffer_atomic_add:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
- break;
- case Intrinsic::amdgcn_buffer_atomic_sub:
- Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
- break;
- case Intrinsic...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93801
More information about the llvm-commits
mailing list