[llvm-branch-commits] [llvm] c3d13ce - [AMDGPU] Use alloc size for array stride in LowerBufferFatPointers (#202530)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jun 9 03:04:35 PDT 2026
Author: Arseniy Obolenskiy
Date: 2026-06-09T11:07:25+02:00
New Revision: c3d13ceb8a894312211c88bf2ef947708442a855
URL: https://github.com/llvm/llvm-project/commit/c3d13ceb8a894312211c88bf2ef947708442a855
DIFF: https://github.com/llvm/llvm-project/commit/c3d13ceb8a894312211c88bf2ef947708442a855.diff
LOG: [AMDGPU] Use alloc size for array stride in LowerBufferFatPointers (#202530)
Array elements are laid out at multiples of getTypeAllocSize, not
getTypeStoreSize
LLVM memory model lays out array element `i` at `i * allocSize`
(reflected in `DataLayout::getTypeAllocSize`), apply it for fat pointers
to prevent miscompile
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 4c1fd1ee660de..679f7d5d8329f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -975,13 +975,13 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
Type *ElemTy = AT->getElementType();
if (!ElemTy->isSingleValueType() || !DL.typeSizeEqualsStoreSize(ElemTy) ||
ElemTy->isVectorTy()) {
- TypeSize ElemStoreSize = DL.getTypeStoreSize(ElemTy);
+ TypeSize ElemAllocSize = DL.getTypeAllocSize(ElemTy);
bool Changed = false;
for (auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
/*Inclusive=*/false)) {
AggIdxs.push_back(I);
Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
- AggByteOff + I * ElemStoreSize.getFixedValue(),
+ AggByteOff + I * ElemAllocSize.getFixedValue(),
Result, Name + Twine(I));
AggIdxs.pop_back();
}
@@ -1095,14 +1095,14 @@ std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(
Type *ElemTy = AT->getElementType();
if (!ElemTy->isSingleValueType() || !DL.typeSizeEqualsStoreSize(ElemTy) ||
ElemTy->isVectorTy()) {
- TypeSize ElemStoreSize = DL.getTypeStoreSize(ElemTy);
+ TypeSize ElemAllocSize = DL.getTypeAllocSize(ElemTy);
bool Changed = false;
for (auto I : llvm::iota_range<uint32_t>(0, AT->getNumElements(),
/*Inclusive=*/false)) {
AggIdxs.push_back(I);
Changed |= std::get<0>(visitStoreImpl(
OrigSI, ElemTy, AggIdxs,
- AggByteOff + I * ElemStoreSize.getFixedValue(), Name + Twine(I)));
+ AggByteOff + I * ElemAllocSize.getFixedValue(), Name + Twine(I)));
AggIdxs.pop_back();
}
return std::make_pair(Changed, /*ModifiedInPlace=*/false);
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll
index a8e67a4a61816..47efd8ac07410 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll
@@ -2039,6 +2039,34 @@ define void @store_a2v2i32([2 x <2 x i32>] %data, ptr addrspace(8) inreg %buf) {
ret void
}
+define [2 x <3 x i32>] @load_a2v3i32(ptr addrspace(8) inreg %buf) {
+; CHECK-LABEL: define [2 x <3 x i32>] @load_a2v3i32(
+; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RET0_OFF_0:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: [[RET0:%.*]] = insertvalue [2 x <3 x i32>] poison, <3 x i32> [[RET0_OFF_0]], 0
+; CHECK-NEXT: [[RET1_OFF_16:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
+; CHECK-NEXT: [[RET:%.*]] = insertvalue [2 x <3 x i32>] [[RET0]], <3 x i32> [[RET1_OFF_16]], 1
+; CHECK-NEXT: ret [2 x <3 x i32>] [[RET]]
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ %ret = load [2 x <3 x i32>], ptr addrspace(7) %p
+ ret [2 x <3 x i32>] %ret
+}
+
+define void @store_a2v3i32([2 x <3 x i32>] %data, ptr addrspace(8) inreg %buf) {
+; CHECK-LABEL: define void @store_a2v3i32(
+; CHECK-SAME: [2 x <3 x i32>] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[DATA0:%.*]] = extractvalue [2 x <3 x i32>] [[DATA]], 0
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA0]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: [[DATA1:%.*]] = extractvalue [2 x <3 x i32>] [[DATA]], 1
+; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA1]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
+; CHECK-NEXT: ret void
+;
+ %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
+ store [2 x <3 x i32>] %data, ptr addrspace(7) %p
+ ret void
+}
+
define { i32 } @load_sl_i32s(ptr addrspace(8) inreg %buf) {
; CHECK-LABEL: define { i32 } @load_sl_i32s(
; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] {
More information about the llvm-branch-commits
mailing list