[llvm] [AMDGPU] Add custom MachineValueType entries for buffer fat poiners (PR #127692)
Krzysztof Drewniak via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 12:14:23 PST 2025
https://github.com/krzysz00 created https://github.com/llvm/llvm-project/pull/127692
The old hack of returning v5/v6i32 for the fat and strided buffer pointers was causing issuse during vectorization queries that expected to be able to construct a VectorType from the return value of `MVT getPointerType()`. On example is in the test attached to this PR, which used to crash.
Now, we define the custom MVT entries, the 160-bit amdgpuBufferFatPointer and 192-bit amdgpuBufferStridedPointer, which are used to represent ptr addrspace(7) and ptr addrspace(9) respectively.
Neither of thse types will de present at the time of lowering to a SelectionDAG or other MIR - MVT::amdgpuBufferFatPointer is eliminated by the LowerBufferFatPointers pass and amdgpu::bufferStridedPointer is not currently used outside of the SPIR-V translator (which does its own lowering).
>From 70cc44e5aa4963d3a1a1d1d4fa38a85470a27863 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Tue, 18 Feb 2025 20:09:49 +0000
Subject: [PATCH] [AMDGPU] Add custom MachineValueType entries for buffer fat
poiners
The old hack of returning v5/v6i32 for the fat and strided buffer
pointers was causing issuse during vectorization queries that expected
to be able to construct a VectorType from the return value of
`MVT getPointerType()`. On example is in the test attached to this PR,
which used to crash.
Now, we define the custom MVT entries, the 160-bit
amdgpuBufferFatPointer and 192-bit amdgpuBufferStridedPointer, which
are used to represent ptr addrspace(7) and ptr addrspace(9)
respectively.
Neither of thse types will de present at the time of lowering to a
SelectionDAG or other MIR - MVT::amdgpuBufferFatPointer is eliminated
by the LowerBufferFatPointers pass and amdgpu::bufferStridedPointer is
not currently used outside of the SPIR-V translator (which does its
own lowering).
---
llvm/include/llvm/CodeGen/ValueTypes.td | 4 ++
llvm/lib/CodeGen/ValueTypes.cpp | 6 +++
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 19 ++++-----
llvm/lib/Target/AMDGPU/SIISelLowering.h | 6 ++-
.../AMDGPU/buffer-fat-pointer.ll | 39 +++++++++++++++++++
5 files changed, 63 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/AMDGPU/buffer-fat-pointer.ll
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 6d6b92958b432..c427aa95cf1cd 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -331,6 +331,10 @@ def i64x8 : ValueType<512, 231>; // 8 Consecutive GPRs (AArch64)
def aarch64svcount
: ValueType<16, 232>; // AArch64 predicate-as-counter
def spirvbuiltin : ValueType<0, 233>; // SPIR-V's builtin type
+// AMDGPU buffer fat pointer, buffer rsrc + offset, rewritten before MIR translation
+def amdgpuBufferFatPointer : ValueType<160, 234>;
+// AMDGPU buffer strided pointer, buffer rsrc + index + offset, doesn't reach MIR
+def amdgpuBufferStridedPointer : ValueType<192, 235>;
let isNormalValueType = false in {
def token : ValueType<0, 504>; // TokenTy
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 2c80eee7c904c..0554b6387c5e6 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -194,6 +194,10 @@ std::string EVT::getEVTString() const {
return "aarch64svcount";
case MVT::spirvbuiltin:
return "spirvbuiltin";
+ case MVT::amdgpuBufferFatPointer:
+ return "amdgpuBufferFatPointer";
+ case MVT::amdgpuBufferStridedPointer:
+ return "amdgpuBufferStridedPointer";
}
}
@@ -219,6 +223,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return TargetExtType::get(Context, "aarch64.svcount");
case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::i64x8: return IntegerType::get(Context, 512);
+ case MVT::amdgpuBufferFatPointer: return IntegerType::get(Context, 160);
+ case MVT::amdgpuBufferStridedPointer: return IntegerType::get(Context, 192);
case MVT::externref: return Type::getWasm_ExternrefTy(Context);
case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
case MVT::Metadata: return Type::getMetadataTy(Context);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e09b310d107ac..6bee33b1c5168 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1175,19 +1175,20 @@ static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI,
return memVTFromLoadIntrData(TLI, DL, ST->getContainedType(0), MaxNumLanes);
}
-/// Map address space 7 to MVT::v5i32 because that's its in-memory
-/// representation. This return value is vector-typed because there is no
-/// MVT::i160 and it is not clear if one can be added. While this could
-/// cause issues during codegen, these address space 7 pointers will be
-/// rewritten away by then. Therefore, we can return MVT::v5i32 in order
-/// to allow pre-codegen passes that query TargetTransformInfo, often for cost
-/// modeling, to work.
+/// Map address space 7 to MVT::amdgpuBufferFatPointer because that's its
+/// in-memory representation. This return value is a custom type because there
+/// is no MVT::i160 and adding one breaks integer promotion logic. While this
+/// could cause issues during codegen, these address space 7 pointers will be
+/// rewritten away by then. Therefore, we can return MVT::amdgpuBufferFatPointer
+/// in order to allow pre-codegen passes that query TargetTransformInfo, often
+/// for cost modeling, to work. (This also sets us up decently for doing the
+/// buffer lowering in GlobalISel if SelectionDAG ever goes away.)
MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
- return MVT::v5i32;
+ return MVT::amdgpuBufferFatPointer;
if (AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
DL.getPointerSizeInBits(AS) == 192)
- return MVT::v6i32;
+ return MVT::amdgpuBufferStridedPointer;
return AMDGPUTargetLowering::getPointerTy(DL, AS);
}
/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1cd7f1b29e077..fedea14af97a5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -302,8 +302,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
// While address space 7 should never make it to codegen, it still needs to
- // have a MVT to prevent some analyses that query this function from breaking,
- // so, to work around the lack of i160, map it to v5i32.
+ // have a MVT to prevent some analyses that query this function from breaking.
+ // We use the custum MVT::amdgpuBufferFatPointer and
+ // amdgpu::amdgpuBufferStridedPointer for this, though we use v8i32 for the
+ // momyr type (which is probably unused).
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/buffer-fat-pointer.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/buffer-fat-pointer.ll
new file mode 100644
index 0000000000000..3abbe13483e03
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/buffer-fat-pointer.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=loop-vectorize -S < %s | FileCheck %s
+
+; Reduced from a crash, variables added to make things more realistic.
+; This is a roundabout test for TargetLowering::getValueType() returning
+; a reasonable value for <N x p7> instead of asserting.
+define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(ptr addrspace(1) %.ptr, i64 %v) {
+; CHECK-LABEL: define amdgpu_kernel void @_dynamic_pack_simple_dispatch_0_pack_i32(
+; CHECK-SAME: ptr addrspace(1) [[DOTPTR:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[_LR_PH5:.*:]]
+; CHECK-NEXT: [[DOTRSRC:%.*]] = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) [[DOTPTR]], i16 0, i32 -2147483648, i32 159744)
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(8) [[DOTRSRC]] to ptr addrspace(7)
+; CHECK-NEXT: br label %[[BB2:.*]]
+; CHECK: [[BB2]]:
+; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[DOTLR_PH5:%.*]] ], [ [[TMP5:%.*]], %[[BB2]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], [[DOT_CRIT_EDGE_LOOPEXIT:label %.*]], label %[[BB2]]
+; CHECK: [[__CRIT_EDGE_LOOPEXIT:.*:]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %.ptr, i16 0, i32 2147483648, i32 159744)
+ %fat = addrspacecast ptr addrspace(8) %rsrc to ptr addrspace(7)
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %ptr = getelementptr i32, ptr addrspace(7) %fat, i32 0
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv, %v
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit: ; preds = %exit
+ ret void
+}
+
+declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) readnone, i16, i32, i32)
More information about the llvm-commits
mailing list