[llvm] b359616 - [DirectX] Resources and simple GEP traversal in DXILMemIntrinsics (#173054)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 22 10:47:55 PST 2025
Author: Justin Bogner
Date: 2025-12-22T11:47:50-07:00
New Revision: b3596163493e8a3342ba3fadfc3689a3354dba8b
URL: https://github.com/llvm/llvm-project/commit/b3596163493e8a3342ba3fadfc3689a3354dba8b
DIFF: https://github.com/llvm/llvm-project/commit/b3596163493e8a3342ba3fadfc3689a3354dba8b.diff
LOG: [DirectX] Resources and simple GEP traversal in DXILMemIntrinsics (#173054)
Walk through GEPs and recognize resource target extension types when
trying to infer the underlying types of memory intrinsics.
Added:
llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll
Modified:
llvm/include/llvm/Analysis/DXILResource.h
llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index 2b0dcb966578e..f38de81d76a2c 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -210,6 +210,14 @@ class AnyResourceExtType : public TargetExtType {
AnyResourceExtType(const AnyResourceExtType &) = delete;
AnyResourceExtType &operator=(const AnyResourceExtType &) = delete;
+ Type *getResourceType() const {
+ // Sampler and feedback resources do not have an underlying type.
+ if (isa<SamplerExtType>(this) || isa<FeedbackTextureExtType>(this))
+ return nullptr;
+ // All other resources store the type in a parameter.
+ return getTypeParameter(0);
+ }
+
static bool classof(const TargetExtType *T) {
return isa<RawBufferExtType>(T) || isa<TypedBufferExtType>(T) ||
isa<TextureExtType>(T) || isa<MSTextureExtType>(T) ||
diff --git a/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp b/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp
index 2542fb315f89a..76352b3a2e354 100644
--- a/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp
+++ b/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp
@@ -8,8 +8,10 @@
#include "DXILMemIntrinsics.h"
#include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/Module.h"
#define DEBUG_TYPE "dxil-mem-intrinsics"
@@ -60,6 +62,53 @@ void expandMemSet(MemSetInst *MemSet) {
MemSet->eraseFromParent();
}
+static Type *getPointeeType(Value *Ptr, const DataLayout &DL) {
+ if (auto *GV = dyn_cast<GlobalVariable>(Ptr))
+ return GV->getValueType();
+ if (auto *AI = dyn_cast<AllocaInst>(Ptr))
+ return AI->getAllocatedType();
+
+ if (auto *II = dyn_cast<IntrinsicInst>(Ptr)) {
+ if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
+ Type *Ty = cast<dxil::AnyResourceExtType>(II->getArgOperand(0)->getType())
+ ->getResourceType();
+ assert(Ty && "getpointer used on untyped resource");
+ return Ty;
+ }
+ }
+
+ if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ Type *Ty = GEP->getResultElementType();
+ if (!Ty->isIntegerTy(8))
+ return Ty;
+
+ // We have ptradd, so we have to hope there's enough information to work out
+ // what we're indexing.
+ Type *IndexedType = getPointeeType(GEP->getPointerOperand(), DL);
+ if (auto *AT = dyn_cast<ArrayType>(IndexedType))
+ return AT->getElementType();
+
+ if (auto *ST = dyn_cast<StructType>(IndexedType)) {
+ // Indexing a struct should always be constant
+ APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+ [[maybe_unused]] bool IsConst =
+ GEP->accumulateConstantOffset(DL, ConstantOffset);
+ assert(IsConst && "Non-constant GEP into struct?");
+
+ // Now, work out what we'll find at that offset.
+ const StructLayout *Layout = DL.getStructLayout(ST);
+ unsigned Idx =
+ Layout->getElementContainingOffset(ConstantOffset.getZExtValue());
+
+ return ST->getTypeAtIndex(Idx);
+ }
+
+ llvm_unreachable("Could not infer type from GEP");
+ }
+
+ llvm_unreachable("Could not calculate pointee type");
+}
+
void expandMemCpy(MemCpyInst *MemCpy) {
IRBuilder<> Builder(MemCpy);
Value *Dst = MemCpy->getDest();
@@ -75,23 +124,13 @@ void expandMemCpy(MemCpyInst *MemCpy) {
const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
- auto GetArrTyFromVal = [](Value *Val) -> ArrayType * {
- assert(isa<AllocaInst>(Val) ||
- isa<GlobalVariable>(Val) &&
- "Expected Val to be an Alloca or Global Variable");
- if (auto *Alloca = dyn_cast<AllocaInst>(Val))
- return dyn_cast<ArrayType>(Alloca->getAllocatedType());
- if (auto *GlobalVar = dyn_cast<GlobalVariable>(Val))
- return dyn_cast<ArrayType>(GlobalVar->getValueType());
- return nullptr;
- };
-
- ArrayType *DstArrTy = GetArrTyFromVal(Dst);
+ auto *DstArrTy = dyn_cast<ArrayType>(getPointeeType(Dst, DL));
assert(DstArrTy && "Expected Dst of memcpy to be a Pointer to an Array Type");
if (auto *DstGlobalVar = dyn_cast<GlobalVariable>(Dst))
assert(!DstGlobalVar->isConstant() &&
"The Dst of memcpy must not be a constant Global Variable");
- [[maybe_unused]] ArrayType *SrcArrTy = GetArrTyFromVal(Src);
+ [[maybe_unused]] auto *SrcArrTy =
+ dyn_cast<ArrayType>(getPointeeType(Src, DL));
assert(SrcArrTy && "Expected Src of memcpy to be a Pointer to an Array Type");
Type *DstElemTy = DstArrTy->getElementType();
diff --git a/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll b/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll
new file mode 100644
index 0000000000000..326d9797ea88a
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-mem-intrinsics -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+%struct.S = type { <4 x i32>, [2 x <4 x i32>] }
+
+define void @test_structarray_alloca() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_alloca(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[OUT:%.*]] = alloca [[STRUCT_S:%.*]], align 16
+; CHECK-NEXT: [[IN:%.*]] = alloca [[STRUCT_S]], align 16
+; CHECK-NEXT: [[OUT_I:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i32 16
+; CHECK-NEXT: [[IN_I:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i32 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 0
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 1
+; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP5]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %out = alloca %struct.S
+ %in = alloca %struct.S
+ %out.i = getelementptr inbounds nuw i8, ptr %out, i32 16
+ %in.i = getelementptr inbounds nuw i8, ptr %in, i32 16
+ tail call void @llvm.memcpy(ptr noundef nonnull align 1 dereferenceable(32) %out.i, ptr noundef nonnull align 1 dereferenceable(32) %in.i, i32 32, i1 false)
+ ret void
+}
+
+define void @test_structarray_alloca_typed() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_alloca_typed(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[OUT:%.*]] = alloca [[STRUCT_S:%.*]], align 16
+; CHECK-NEXT: [[IN:%.*]] = alloca [[STRUCT_S]], align 16
+; CHECK-NEXT: [[OUT_I:%.*]] = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr [[OUT]], i32 0, i32 1
+; CHECK-NEXT: [[IN_I:%.*]] = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr [[IN]], i32 0, i32 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 0
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[GEP]], align 16
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 0
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GEP1]], align 16
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[GEP2]], align 16
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 1
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[GEP3]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %out = alloca %struct.S
+ %in = alloca %struct.S
+ %out.i = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr %out, i32 0, i32 1
+ %in.i = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr %in, i32 0, i32 1
+ tail call void @llvm.memcpy(ptr noundef nonnull align 1 dereferenceable(32) %out.i, ptr noundef nonnull align 1 dereferenceable(32) %in.i, i32 32, i1 false)
+ ret void
+}
+
+ at shared1 = external hidden local_unnamed_addr addrspace(3) global %struct.S
+ at shared2 = external hidden local_unnamed_addr addrspace(3) global %struct.S
+define void @test_structarray_groupshared() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_groupshared(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared2, i32 16), align 16
+; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared1, i32 16), align 16
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared2, i32 16), i32 0, i32 1), align 16
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr addrspace(3) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared1, i32 16), i32 0, i32 1), align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ tail call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) noundef nonnull align 1 dereferenceable(32) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared1, i32 16), ptr addrspace(3) noundef nonnull align 1 dereferenceable(32) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared2, i32 16), i32 32, i1 false)
+ ret void
+}
+
+define void @test_structarray_in_buffer() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_in_buffer(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[H_OUT:%.*]] = tail call target("dx.RawBuffer", [[STRUCT_S:%.*]], 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.Ss_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+; CHECK-NEXT: [[H_IN:%.*]] = tail call target("dx.RawBuffer", [[STRUCT_S]], 0, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.Ss_0_0t(i32 0, i32 1, i32 1, i32 0, ptr null)
+; CHECK-NEXT: [[P_OUT:%.*]] = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t(target("dx.RawBuffer", [[STRUCT_S]], 1, 0) [[H_OUT]], i32 0)
+; CHECK-NEXT: [[OUT_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P_OUT]], i32 16
+; CHECK-NEXT: [[P_IN:%.*]] = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_0_0t(target("dx.RawBuffer", [[STRUCT_S]], 0, 0) [[H_IN]], i32 0)
+; CHECK-NEXT: [[IN_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P_IN]], i32 16
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 0
+; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 1
+; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP5]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %h_out = tail call target("dx.RawBuffer", %struct.S, 1, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ %h_in = tail call target("dx.RawBuffer", %struct.S, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 1, i32 1, i32 0, ptr null)
+ %p_out = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer(target("dx.RawBuffer", %struct.S, 1, 0) %h_out, i32 0)
+ %out.i = getelementptr inbounds nuw i8, ptr %p_out, i32 16
+ %p_in = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer(target("dx.RawBuffer", %struct.S, 0, 0) %h_in, i32 0)
+ %in.i = getelementptr inbounds nuw i8, ptr %p_in, i32 16
+ tail call void @llvm.memcpy(ptr noundef nonnull align 1 dereferenceable(32) %in.i, ptr noundef nonnull align 1 dereferenceable(32) %out.i, i32 32, i1 false)
+ ret void
+}
More information about the llvm-commits
mailing list