[llvm] [AMDGPU] Preserve `noundef` and `range` during kernel argument loads (PR #118395)
Krzysztof Drewniak via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 2 12:45:02 PST 2024
https://github.com/krzysz00 created https://github.com/llvm/llvm-project/pull/118395
This commit ensures than noundef (which is frequently a prerequisite for other annotations) and range() annotations on kernel arguments are copied onto their corresponding load from the kernel argument structure.
>From 4ea71e1efdff5d71e2aaaa0a711764756454ac44 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Mon, 2 Dec 2024 20:42:19 +0000
Subject: [PATCH] [AMDGPU] Preserve `noundef` and `range` during kernel
argument loads
This commit ensures than noundef (which is frequently a prerequisite
for other annotations) and range() annotations on kernel arguments are
copied onto their corresponding load from the kernel argument
structure.
---
.../AMDGPU/AMDGPULowerKernelArguments.cpp | 11 +++++
llvm/test/CodeGen/AMDGPU/lower-kernargs.ll | 49 +++++++++++++------
2 files changed, 46 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 380633f22a1781..9de4cf82d0faca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -15,6 +15,7 @@
#include "GCNSubtarget.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
@@ -416,6 +417,16 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
MDBuilder MDB(Ctx);
+ if (Arg.hasAttribute(Attribute::NoUndef))
+ Load->setMetadata(LLVMContext::MD_noundef, MDNode::get(Ctx, {}));
+
+ if (Arg.hasAttribute(Attribute::Range)) {
+ const ConstantRange &Range =
+ Arg.getAttribute(Attribute::Range).getValueAsConstantRange();
+ Load->setMetadata(LLVMContext::MD_range,
+ MDB.createRange(Range.getLower(), Range.getUpper()));
+ }
+
if (isa<PointerType>(ArgTy)) {
if (Arg.hasNonNullAttr())
Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
index 043294f5a11634..8b2bf911b0dbc3 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -255,6 +255,25 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
ret void
}
+define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %arg0) {
+; HSA-LABEL: @kern_range_noundef_i32(
+; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0
+; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
+; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
+; HSA-NEXT: ret void
+;
+; MESA-LABEL: @kern_range_noundef_i32(
+; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36
+; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]]
+; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(1) poison, align 4
+; MESA-NEXT: ret void
+;
+ store volatile i32 %arg0, ptr addrspace(1) poison
+ ret void
+}
+
define amdgpu_kernel void @kern_f32(float %arg0) {
; HSA-LABEL: @kern_f32(
; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
@@ -1022,14 +1041,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) deref
; HSA-LABEL: @kern_global_ptr_dereferencable(
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_global_ptr_dereferencable(
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META2:![0-9]+]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; MESA-NEXT: ret void
;
@@ -1041,14 +1060,14 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(
; HSA-LABEL: @kern_global_ptr_dereferencable_or_null(
; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META3:![0-9]+]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; MESA-NEXT: ret void
;
@@ -1079,14 +1098,14 @@ define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %
; HSA-LABEL: @kern_align32_global_ptr(
; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META4:![0-9]+]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_align32_global_ptr(
; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META4:![0-9]+]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8
; MESA-NEXT: ret void
;
@@ -1120,14 +1139,14 @@ define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr
; HSA-LABEL: @kern_noundef_global_ptr(
; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
-; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]]
+; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]]
; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
; HSA-NEXT: ret void
;
; MESA-LABEL: @kern_noundef_global_ptr(
; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
-; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]]
+; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]]
; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8
; MESA-NEXT: ret void
;
@@ -1729,13 +1748,15 @@ attributes #2 = { nounwind "target-cpu"="tahiti" }
;.
; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
; HSA: [[META1]] = !{}
-; HSA: [[META2]] = !{i64 42}
-; HSA: [[META3]] = !{i64 128}
-; HSA: [[META4]] = !{i64 1024}
+; HSA: [[RNG2]] = !{i32 0, i32 8}
+; HSA: [[META3]] = !{i64 42}
+; HSA: [[META4]] = !{i64 128}
+; HSA: [[META5]] = !{i64 1024}
;.
; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
; MESA: [[META1]] = !{}
-; MESA: [[META2]] = !{i64 42}
-; MESA: [[META3]] = !{i64 128}
-; MESA: [[META4]] = !{i64 1024}
+; MESA: [[RNG2]] = !{i32 0, i32 8}
+; MESA: [[META3]] = !{i64 42}
+; MESA: [[META4]] = !{i64 128}
+; MESA: [[META5]] = !{i64 1024}
;.
More information about the llvm-commits
mailing list