[llvm] ec78750 - Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas" (#171087)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 00:25:52 PST 2025
Author: Jan Patrick Lehr
Date: 2025-12-08T08:25:48Z
New Revision: ec787501dc3d60f2927abfcf4c8d322ea6baa82a
URL: https://github.com/llvm/llvm-project/commit/ec787501dc3d60f2927abfcf4c8d322ea6baa82a
DIFF: https://github.com/llvm/llvm-project/commit/ec787501dc3d60f2927abfcf4c8d322ea6baa82a.diff
LOG: Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas" (#171087)
Reverts llvm/llvm-project#166132
Broke libc on GPU tests.
https://lab.llvm.org/buildbot/#/builders/10/builds/18635
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bab76e87af40c..b79689c39ef84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -457,25 +457,10 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
const auto &VarOffset = VarOffsets.front();
APInt OffsetQuot;
APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
- Value *Offset = VarOffset.first;
- if (Rem != 0) {
- unsigned ElemSizeShift = Log2_64(VecElemSize);
- SimplifyQuery SQ(DL);
- SQ.CxtI = GEP;
- KnownBits KB = computeKnownBits(VarOffset.first, SQ);
- // Bail out if the index may point into the middle of an element.
- if (KB.countMinTrailingZeros() < ElemSizeShift)
- return nullptr;
-
- Value *Scaled = Builder.CreateLShr(VarOffset.first, ElemSizeShift);
- if (Instruction *NewInst = dyn_cast<Instruction>(Scaled))
- NewInsts.push_back(NewInst);
-
- Offset = Scaled;
- OffsetQuot = APInt(BW, 1);
- Rem = 0;
- }
+ if (Rem != 0 || OffsetQuot.isZero())
+ return nullptr;
+ Value *Offset = VarOffset.first;
if (!isa<IntegerType>(Offset->getType()))
return nullptr;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
index bcc61062640d2..76e1868b3c4b9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
@@ -250,150 +250,6 @@ bb2:
store i32 0, ptr addrspace(5) %extractelement
ret void
}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 0, i32 4
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
-; CHECK-NEXT: store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 0, i32 4
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
-; CHECK-NEXT: store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 4, i32 8
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x float> poison
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[VEC]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[ALLOCA]], float [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[VEC]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP2]], float [[TMP3]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[VEC]], i64 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP4]], float [[TMP5]], i32 2
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP6]], float [[DATA]], i32 [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x float> [[TMP8]], i32 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x float> poison, float [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x float> [[TMP10]], float [[TMP11]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP8]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <3 x float> [[TMP12]], float [[TMP13]], i64 2
-; CHECK-NEXT: store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
- %row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %row, align 16
- %index = select i1 %idx_sel, i32 4, i32 8
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %row, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_5_no_promote(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 2
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[TMP4]], align 4, !range [[RNG1]], !invariant.load [[META0]]
-; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 16
-; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
-; CHECK-NEXT: [[TMP9:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul nuw nsw i32 [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP9]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x <3 x float>], ptr addrspace(3) @scalar_alloca_vector_gep_i8_4_or_5_no_promote.alloca, i32 0, i32 [[TMP14]]
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: store <3 x float> [[VEC]], ptr addrspace(3) [[TMP15]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
-; CHECK-NEXT: [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(3) [[TMP15]], i32 [[INDEX]]
-; CHECK-NEXT: store float [[DATA]], ptr addrspace(3) [[ELT]], align 4
-; CHECK-NEXT: [[UPDATED:%.*]] = load <3 x float>, ptr addrspace(3) [[TMP15]], align 16
-; CHECK-NEXT: store <3 x float> [[UPDATED]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 4, i32 5
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(ptr %buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_5_no_promote(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x <3 x float>], align 16, addrspace(5)
-; CHECK-NEXT: [[ROW:%.*]] = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) [[ALLOCA]], i32 0, i32 0
-; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT: store <3 x float> [[VEC]], ptr addrspace(5) [[ROW]], align 16
-; CHECK-NEXT: [[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 5
-; CHECK-NEXT: [[ELT:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(5) [[ROW]], i32 [[INDEX]]
-; CHECK-NEXT: store float [[DATA]], ptr addrspace(5) [[ELT]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = load <3 x float>, ptr addrspace(5) [[ROW]], align 16
-; CHECK-NEXT: store <3 x float> [[TMP14]], ptr [[BUFFER]], align 16
-; CHECK-NEXT: ret void
-;
- %alloca = alloca [2 x <3 x float>], align 16, addrspace(5)
- %row = getelementptr inbounds [2 x <3 x float>], ptr addrspace(5) %alloca, i32 0, i32 0
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %row, align 16
- %index = select i1 %idx_sel, i32 4, i32 5
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %row, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %row, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
;.
; CHECK: [[META0]] = !{}
; CHECK: [[RNG1]] = !{i32 0, i32 1025}
More information about the llvm-commits
mailing list