[llvm-branch-commits] [llvm] PR for llvm/llvm-project#80694 (PR #80695)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 5 07:56:52 PST 2024
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/80695
>From 09303e727e515a7856d5f4cb100c5a9dec00b626 Mon Sep 17 00:00:00 2001
From: Pierre van Houtryve <pierre.vanhoutryve at amd.com>
Date: Mon, 5 Feb 2024 14:36:15 +0100
Subject: [PATCH] [AMDGPU][PromoteAlloca] Support memsets to ptr allocas
(#80678)
Fixes #80366
(cherry picked from commit 4e958abf2f44d08129eafd5b6a4ee2bd3584ed22)
---
.../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 16 ++++--
.../CodeGen/AMDGPU/promote-alloca-memset.ll | 54 +++++++++++++++++++
2 files changed, 66 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 5e73411cae9b70..c1b244f50d93f8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -521,10 +521,18 @@ static Value *promoteAllocaUserToVector(
// For memset, we don't need to know the previous value because we
// currently only allow memsets that cover the whole alloca.
Value *Elt = MSI->getOperand(1);
- if (DL.getTypeStoreSize(VecEltTy) > 1) {
- Value *EltBytes =
- Builder.CreateVectorSplat(DL.getTypeStoreSize(VecEltTy), Elt);
- Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
+ const unsigned BytesPerElt = DL.getTypeStoreSize(VecEltTy);
+ if (BytesPerElt > 1) {
+ Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt);
+
+ // If the element type of the vector is a pointer, we need to first cast
+ // to an integer, then use a PtrCast.
+ if (VecEltTy->isPointerTy()) {
+ Type *PtrInt = Builder.getIntNTy(BytesPerElt * 8);
+ Elt = Builder.CreateBitCast(EltBytes, PtrInt);
+ Elt = Builder.CreateIntToPtr(Elt, VecEltTy);
+ } else
+ Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
}
return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
index 15af1f17e230ec..f1e2737b370ef0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
@@ -84,4 +84,58 @@ entry:
ret void
}
+define amdgpu_kernel void @memset_array_ptr_alloca(ptr %out) {
+; CHECK-LABEL: @memset_array_ptr_alloca(
+; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca [6 x ptr], align 16, addrspace(5)
+ call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
+ %load = load i64, ptr addrspace(5) %alloca
+ store i64 %load, ptr %out
+ ret void
+}
+
+define amdgpu_kernel void @memset_vector_ptr_alloca(ptr %out) {
+; CHECK-LABEL: @memset_vector_ptr_alloca(
+; CHECK-NEXT: store i64 0, ptr [[OUT:%.*]], align 8
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca <6 x ptr>, align 16, addrspace(5)
+ call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
+ %load = load i64, ptr addrspace(5) %alloca
+ store i64 %load, ptr %out
+ ret void
+}
+
+define amdgpu_kernel void @memset_array_of_array_ptr_alloca(ptr %out) {
+; CHECK-LABEL: @memset_array_of_array_ptr_alloca(
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x [3 x ptr]], align 16, addrspace(5)
+; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[ALLOCA]], i8 0, i64 48, i1 false)
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: store i64 [[LOAD]], ptr [[OUT:%.*]], align 8
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca [2 x [3 x ptr]], align 16, addrspace(5)
+ call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
+ %load = load i64, ptr addrspace(5) %alloca
+ store i64 %load, ptr %out
+ ret void
+}
+
+define amdgpu_kernel void @memset_array_of_vec_ptr_alloca(ptr %out) {
+; CHECK-LABEL: @memset_array_of_vec_ptr_alloca(
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x <3 x ptr>], align 16, addrspace(5)
+; CHECK-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) [[ALLOCA]], i8 0, i64 48, i1 false)
+; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(5) [[ALLOCA]], align 8
+; CHECK-NEXT: store i64 [[LOAD]], ptr [[OUT:%.*]], align 8
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca [2 x <3 x ptr>], align 16, addrspace(5)
+ call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 48, i1 false)
+ %load = load i64, ptr addrspace(5) %alloca
+ store i64 %load, ptr %out
+ ret void
+}
+
declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)
More information about the llvm-branch-commits
mailing list