[llvm] f558c30 - AMDGPU/PromoteAlloca: Always use i32 for indexing (#170511)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 12:55:01 PST 2025
Author: Nicolai Hähnle
Date: 2025-12-05T12:54:57-08:00
New Revision: f558c30146e51d5ef72bf3d4b3f0e86ca19e4b99
URL: https://github.com/llvm/llvm-project/commit/f558c30146e51d5ef72bf3d4b3f0e86ca19e4b99
DIFF: https://github.com/llvm/llvm-project/commit/f558c30146e51d5ef72bf3d4b3f0e86ca19e4b99.diff
LOG: AMDGPU/PromoteAlloca: Always use i32 for indexing (#170511)
Create more canonical code that may even lead to slightly better
codegen.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 73ec607014d31..efd3664266dee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -461,13 +461,15 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
return nullptr;
Value *Offset = VarOffset.first;
- auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
- if (!OffsetType)
+ if (!isa<IntegerType>(Offset->getType()))
return nullptr;
+ Offset = Builder.CreateSExtOrTrunc(Offset, Builder.getIntNTy(BW));
+ if (Offset != VarOffset.first)
+ NewInsts.push_back(cast<Instruction>(Offset));
+
if (!OffsetQuot.isOne()) {
- ConstantInt *ConstMul =
- ConstantInt::get(Ctx, OffsetQuot.sext(OffsetType->getBitWidth()));
+ ConstantInt *ConstMul = ConstantInt::get(Ctx, OffsetQuot.sextOrTrunc(BW));
Offset = Builder.CreateMul(Offset, ConstMul);
if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
NewInsts.push_back(NewInst);
@@ -475,8 +477,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
if (ConstOffset.isZero())
return Offset;
- ConstantInt *ConstIndex =
- ConstantInt::get(Ctx, IndexQuot.sext(OffsetType->getBitWidth()));
+ ConstantInt *ConstIndex = ConstantInt::get(Ctx, IndexQuot.sextOrTrunc(BW));
Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
NewInsts.push_back(NewInst);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
index 63622e67e7d0b..7b64d8728cc24 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-multidim.ll
@@ -262,14 +262,15 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <6 x i64> [[TMP12]], i64 3, i32 3
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 4, i32 4
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 5, i32 5
-; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[SEL3]] to i32
+; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP7]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i64> poison, i64 [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP17]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <3 x i64> [[TMP3]], i64 [[TMP5]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP1]], 2
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], 2
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x i64> [[TMP15]], i32 [[TMP18]]
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x i64> [[TMP6]], i64 [[TMP8]], i64 2
; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP9]], i32 2
; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
@@ -311,15 +312,16 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(ptr %out)
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 3, i32 3
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
-; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 6
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[SEL3]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP17]], 3
+; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP8]], 6
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP18]]
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP19]]
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP18]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i32 [[TMP20]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
index a865bf5058d6a..7da441f2e79d2 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll
@@ -11,8 +11,10 @@ define amdgpu_kernel void @negative_index_byte(ptr %out, i64 %offset) {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> [[TMP1]], i8 1, i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 2, i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> [[TMP3]], i8 3, i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[OFFSET:%.*]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[OFFSET]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP8]], -1
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[TMP4]], i32 [[TMP7]]
; CHECK-NEXT: store i8 [[TMP6]], ptr [[OUT:%.*]], align 1
; CHECK-NEXT: ret void
;
@@ -39,8 +41,10 @@ define amdgpu_kernel void @negative_index_word(ptr %out, i64 %offset) {
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 1, i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 2, i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 3, i32 3
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET:%.*]], -1
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[OFFSET:%.*]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[OFFSET]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP8]], -1
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 [[TMP7]]
; CHECK-NEXT: store i32 [[TMP6]], ptr [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
index 9fb73963153a2..aaec725f85890 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -mcpu=fiji -passes=sroa,amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -passes=sroa,amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
; GCN-LABEL: {{^}}float4_alloca_store4:
; OPT-LABEL: define amdgpu_kernel void @float4_alloca_store4
More information about the llvm-commits
mailing list