[llvm] 6e41055 - [NFC][AMDGPU] Improve code introduced in #124607 (#124672)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 19:57:20 PST 2025
Author: Shilei Tian
Date: 2025-01-27T22:57:16-05:00
New Revision: 6e4105574ebb1c4a664c5b24a4fb2b6cbc51d73e
URL: https://github.com/llvm/llvm-project/commit/6e4105574ebb1c4a664c5b24a4fb2b6cbc51d73e
DIFF: https://github.com/llvm/llvm-project/commit/6e4105574ebb1c4a664c5b24a4fb2b6cbc51d73e.diff
LOG: [NFC][AMDGPU] Improve code introduced in #124607 (#124672)
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
Removed:
llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-marks.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index fec0ebcd705936..28a27ffc136776 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -1557,26 +1557,16 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group: {
- SmallVector<Type *> ArgTy;
SmallVector<Value *> Args;
if (Intr->getIntrinsicID() == Intrinsic::invariant_start) {
- Value *Size = Intr->getArgOperand(0);
- ArgTy.emplace_back(Offset->getType());
- Args.emplace_back(Size);
- Args.emplace_back(Offset);
+ Args.emplace_back(Intr->getArgOperand(0));
} else if (Intr->getIntrinsicID() == Intrinsic::invariant_end) {
- Value *InvariantPtr = Intr->getArgOperand(0);
- Value *Size = Intr->getArgOperand(1);
- ArgTy.emplace_back(Offset->getType());
- Args.emplace_back(InvariantPtr);
- Args.emplace_back(Size);
- Args.emplace_back(Offset);
- } else {
- ArgTy.emplace_back(Offset->getType());
- Args.emplace_back(Offset);
+ Args.emplace_back(Intr->getArgOperand(0));
+ Args.emplace_back(Intr->getArgOperand(1));
}
+ Args.emplace_back(Offset);
Function *F = Intrinsic::getOrInsertDeclaration(
- Intr->getModule(), Intr->getIntrinsicID(), ArgTy);
+ Intr->getModule(), Intr->getIntrinsicID(), Offset->getType());
CallInst *NewIntr =
CallInst::Create(F, Args, Intr->getName(), Intr->getIterator());
Intr->mutateType(NewIntr->getType());
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
index a585901fc377cb..e100e7a41472c1 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
@@ -1,25 +1,75 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-target datalayout = "A5"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca %s -o - | FileCheck %s
-declare ptr @llvm.invariant.start.p5(i64, ptr addrspace(5) nocapture) #0
-declare void @llvm.invariant.end.p5(ptr, i64, ptr addrspace(5) nocapture) #0
-declare ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5)) #1
+declare ptr @llvm.invariant.start.p5(i64, ptr addrspace(5) nocapture)
+declare void @llvm.invariant.end.p5(ptr, i64, ptr addrspace(5) nocapture)
+declare ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5))
+declare ptr addrspace(5) @llvm.strip.invariant.group.p5(ptr addrspace(5))
-; GCN-LABEL: {{^}}use_invariant_promotable_lds:
-; GCN: buffer_load_dword
-; GCN: ds_write_b32
-define amdgpu_kernel void @use_invariant_promotable_lds(ptr addrspace(1) %arg) #2 {
+define amdgpu_kernel void @use_invariant_start_and_end() {
+; CHECK-LABEL: define amdgpu_kernel void @use_invariant_start_and_end() {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]]
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr addrspace(3) @use_invariant_start_and_end.alloca, i32 0, i32 [[TMP13]]
+; CHECK-NEXT: [[INVARIANT1:%.*]] = call ptr @llvm.invariant.start.p3(i64 0, ptr addrspace(3) [[TMP14]])
+; CHECK-NEXT: store <2 x i1> zeroinitializer, ptr [[INVARIANT1]], align 1
+; CHECK-NEXT: call void @llvm.invariant.end.p3(ptr [[INVARIANT1]], i64 0, ptr addrspace(3) [[TMP14]])
+; CHECK-NEXT: ret void
+;
bb:
- %tmp = alloca i32, align 4, addrspace(5)
- %tmp2 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
- %tmp3 = load i32, ptr addrspace(1) %tmp2
- store i32 %tmp3, ptr addrspace(5) %tmp
- %tmp4 = call ptr @llvm.invariant.start.p5(i64 4, ptr addrspace(5) %tmp) #0
- call void @llvm.invariant.end.p5(ptr %tmp4, i64 4, ptr addrspace(5) %tmp) #0
- %tmp5 = call ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5) %tmp) #1
+ %alloca = alloca i32, align 4, addrspace(5)
+ %invariant = call ptr @llvm.invariant.start.p5(i64 0, ptr addrspace(5) %alloca)
+ store <2 x i1> zeroinitializer, ptr %invariant, align 1
+ call void @llvm.invariant.end.p5(ptr %invariant, i64 0, ptr addrspace(5) %alloca)
ret void
}
-attributes #0 = { argmemonly nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
+define amdgpu_kernel void @use_invariant_group_and_strip() {
+; CHECK-LABEL: define amdgpu_kernel void @use_invariant_group_and_strip() {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr addrspace(3) @use_invariant_group_and_strip.alloca, i32 0, i32 [[TMP13]]
+; CHECK-NEXT: [[INVARIANT2:%.*]] = call ptr addrspace(3) @llvm.launder.invariant.group.p3(ptr addrspace(3) [[TMP14]])
+; CHECK-NEXT: store <2 x i1> zeroinitializer, ptr addrspace(3) [[INVARIANT2]], align 1
+; CHECK-NEXT: [[STRIP1:%.*]] = call ptr addrspace(3) @llvm.strip.invariant.group.p3(ptr addrspace(3) [[TMP14]])
+; CHECK-NEXT: store <2 x i1> zeroinitializer, ptr addrspace(3) [[STRIP1]], align 1
+; CHECK-NEXT: ret void
+;
+bb:
+ %alloca = alloca i32, align 4, addrspace(5)
+ %invariant = call ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5) %alloca)
+ store <2 x i1> zeroinitializer, ptr addrspace(5) %invariant, align 1
+ %strip = call ptr addrspace(5) @llvm.strip.invariant.group.p5(ptr addrspace(5) %alloca)
+ store <2 x i1> zeroinitializer, ptr addrspace(5) %strip, align 1
+ ret void
+}
+;.
+; CHECK: [[META0]] = !{}
+; CHECK: [[RNG1]] = !{i32 0, i32 1025}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-marks.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-marks.ll
deleted file mode 100644
index fca4be5e76daea..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-marks.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca %s -o - | FileCheck %s
-
-declare ptr @llvm.invariant.start.p5(i64, ptr addrspace(5) nocapture)
-declare void @llvm.invariant.end.p5(ptr, i64, ptr addrspace(5) nocapture)
-declare ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5))
-declare ptr addrspace(5) @llvm.strip.invariant.group.p5(ptr addrspace(5))
-
-define amdgpu_kernel void @use_invariant_start_and_end() {
-; CHECK-LABEL: define amdgpu_kernel void @use_invariant_start_and_end() {
-; CHECK-NEXT: [[BB:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0:![0-9]+]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1:![0-9]+]], !invariant.load [[META0]]
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
-; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
-; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
-; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr addrspace(3) @use_invariant_start_and_end.alloca, i32 0, i32 [[TMP13]]
-; CHECK-NEXT: [[INVARIENT1:%.*]] = call ptr @llvm.invariant.start.p3(i64 0, ptr addrspace(3) [[TMP14]])
-; CHECK-NEXT: store <2 x i1> zeroinitializer, ptr [[INVARIENT1]], align 1
-; CHECK-NEXT: call void @llvm.invariant.end.p3(ptr [[INVARIENT1]], i64 0, ptr addrspace(3) [[TMP14]])
-; CHECK-NEXT: ret void
-;
-bb:
- %alloca = alloca i32, align 4, addrspace(5)
- %invarient = call ptr @llvm.invariant.start.p5(i64 0, ptr addrspace(5) %alloca)
- store <2 x i1> zeroinitializer, ptr %invarient, align 1
- call void @llvm.invariant.end.p5(ptr %invarient, i64 0, ptr addrspace(5) %alloca)
- ret void
-}
-
-define amdgpu_kernel void @use_invariant_group_and_strip() {
-; CHECK-LABEL: define amdgpu_kernel void @use_invariant_group_and_strip() {
-; CHECK-NEXT: [[BB:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]]
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16
-; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y()
-; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z()
-; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x i32], ptr addrspace(3) @use_invariant_group_and_strip.alloca, i32 0, i32 [[TMP13]]
-; CHECK-NEXT: [[INVARIENT2:%.*]] = call ptr addrspace(3) @llvm.launder.invariant.group.p3(ptr addrspace(3) [[TMP14]])
-; CHECK-NEXT: store <2 x i1> zeroinitializer, ptr addrspace(3) [[INVARIENT2]], align 1
-; CHECK-NEXT: [[STRIP1:%.*]] = call ptr addrspace(3) @llvm.strip.invariant.group.p3(ptr addrspace(3) [[TMP14]])
-; CHECK-NEXT: store <2 x i1> zeroinitializer, ptr addrspace(3) [[STRIP1]], align 1
-; CHECK-NEXT: ret void
-;
-bb:
- %alloca = alloca i32, align 4, addrspace(5)
- %invarient = call ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5) %alloca)
- store <2 x i1> zeroinitializer, ptr addrspace(5) %invarient, align 1
- %strip = call ptr addrspace(5) @llvm.strip.invariant.group.p5(ptr addrspace(5) %alloca)
- store <2 x i1> zeroinitializer, ptr addrspace(5) %strip, align 1
- ret void
-}
-;.
-; CHECK: [[META0]] = !{}
-; CHECK: [[RNG1]] = !{i32 0, i32 1025}
-;.
More information about the llvm-commits
mailing list