[llvm] b359616 - [DirectX] Resources and simple GEP traversal in DXILMemIntrinsics (#173054)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 22 10:47:55 PST 2025


Author: Justin Bogner
Date: 2025-12-22T11:47:50-07:00
New Revision: b3596163493e8a3342ba3fadfc3689a3354dba8b

URL: https://github.com/llvm/llvm-project/commit/b3596163493e8a3342ba3fadfc3689a3354dba8b
DIFF: https://github.com/llvm/llvm-project/commit/b3596163493e8a3342ba3fadfc3689a3354dba8b.diff

LOG: [DirectX] Resources and simple GEP traversal in DXILMemIntrinsics (#173054)

Walk through GEPs and recognize resource target extension types when
trying to infer the underlying types of memory intrinsics.

Added: 
    llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll

Modified: 
    llvm/include/llvm/Analysis/DXILResource.h
    llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index 2b0dcb966578e..f38de81d76a2c 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -210,6 +210,14 @@ class AnyResourceExtType : public TargetExtType {
   AnyResourceExtType(const AnyResourceExtType &) = delete;
   AnyResourceExtType &operator=(const AnyResourceExtType &) = delete;
 
+  Type *getResourceType() const {
+    // Sampler and feedback resources do not have an underlying type.
+    if (isa<SamplerExtType>(this) || isa<FeedbackTextureExtType>(this))
+      return nullptr;
+    // All other resources store the type in a parameter.
+    return getTypeParameter(0);
+  }
+
   static bool classof(const TargetExtType *T) {
     return isa<RawBufferExtType>(T) || isa<TypedBufferExtType>(T) ||
            isa<TextureExtType>(T) || isa<MSTextureExtType>(T) ||

diff  --git a/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp b/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp
index 2542fb315f89a..76352b3a2e354 100644
--- a/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp
+++ b/llvm/lib/Target/DirectX/DXILMemIntrinsics.cpp
@@ -8,8 +8,10 @@
 
 #include "DXILMemIntrinsics.h"
 #include "DirectX.h"
+#include "llvm/Analysis/DXILResource.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
 #include "llvm/IR/Module.h"
 
 #define DEBUG_TYPE "dxil-mem-intrinsics"
@@ -60,6 +62,53 @@ void expandMemSet(MemSetInst *MemSet) {
   MemSet->eraseFromParent();
 }
 
+static Type *getPointeeType(Value *Ptr, const DataLayout &DL) {
+  if (auto *GV = dyn_cast<GlobalVariable>(Ptr))
+    return GV->getValueType();
+  if (auto *AI = dyn_cast<AllocaInst>(Ptr))
+    return AI->getAllocatedType();
+
+  if (auto *II = dyn_cast<IntrinsicInst>(Ptr)) {
+    if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) {
+      Type *Ty = cast<dxil::AnyResourceExtType>(II->getArgOperand(0)->getType())
+                     ->getResourceType();
+      assert(Ty && "getpointer used on untyped resource");
+      return Ty;
+    }
+  }
+
+  if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+    Type *Ty = GEP->getResultElementType();
+    if (!Ty->isIntegerTy(8))
+      return Ty;
+
+    // We have ptradd, so we have to hope there's enough information to work out
+    // what we're indexing.
+    Type *IndexedType = getPointeeType(GEP->getPointerOperand(), DL);
+    if (auto *AT = dyn_cast<ArrayType>(IndexedType))
+      return AT->getElementType();
+
+    if (auto *ST = dyn_cast<StructType>(IndexedType)) {
+      // Indexing a struct should always be constant
+      APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
+      [[maybe_unused]] bool IsConst =
+          GEP->accumulateConstantOffset(DL, ConstantOffset);
+      assert(IsConst && "Non-constant GEP into struct?");
+
+      // Now, work out what we'll find at that offset.
+      const StructLayout *Layout = DL.getStructLayout(ST);
+      unsigned Idx =
+          Layout->getElementContainingOffset(ConstantOffset.getZExtValue());
+
+      return ST->getTypeAtIndex(Idx);
+    }
+
+    llvm_unreachable("Could not infer type from GEP");
+  }
+
+  llvm_unreachable("Could not calculate pointee type");
+}
+
 void expandMemCpy(MemCpyInst *MemCpy) {
   IRBuilder<> Builder(MemCpy);
   Value *Dst = MemCpy->getDest();
@@ -75,23 +124,13 @@ void expandMemCpy(MemCpyInst *MemCpy) {
 
   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
 
-  auto GetArrTyFromVal = [](Value *Val) -> ArrayType * {
-    assert(isa<AllocaInst>(Val) ||
-           isa<GlobalVariable>(Val) &&
-               "Expected Val to be an Alloca or Global Variable");
-    if (auto *Alloca = dyn_cast<AllocaInst>(Val))
-      return dyn_cast<ArrayType>(Alloca->getAllocatedType());
-    if (auto *GlobalVar = dyn_cast<GlobalVariable>(Val))
-      return dyn_cast<ArrayType>(GlobalVar->getValueType());
-    return nullptr;
-  };
-
-  ArrayType *DstArrTy = GetArrTyFromVal(Dst);
+  auto *DstArrTy = dyn_cast<ArrayType>(getPointeeType(Dst, DL));
   assert(DstArrTy && "Expected Dst of memcpy to be a Pointer to an Array Type");
   if (auto *DstGlobalVar = dyn_cast<GlobalVariable>(Dst))
     assert(!DstGlobalVar->isConstant() &&
            "The Dst of memcpy must not be a constant Global Variable");
-  [[maybe_unused]] ArrayType *SrcArrTy = GetArrTyFromVal(Src);
+  [[maybe_unused]] auto *SrcArrTy =
+      dyn_cast<ArrayType>(getPointeeType(Src, DL));
   assert(SrcArrTy && "Expected Src of memcpy to be a Pointer to an Array Type");
 
   Type *DstElemTy = DstArrTy->getElementType();

diff  --git a/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll b/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll
new file mode 100644
index 0000000000000..326d9797ea88a
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/MemIntrinsics/memcpy-pointee.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-mem-intrinsics -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+%struct.S = type { <4 x i32>, [2 x <4 x i32>] }
+
+define void @test_structarray_alloca() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_alloca(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[OUT:%.*]] = alloca [[STRUCT_S:%.*]], align 16
+; CHECK-NEXT:    [[IN:%.*]] = alloca [[STRUCT_S]], align 16
+; CHECK-NEXT:    [[OUT_I:%.*]] = getelementptr inbounds nuw i8, ptr [[OUT]], i32 16
+; CHECK-NEXT:    [[IN_I:%.*]] = getelementptr inbounds nuw i8, ptr [[IN]], i32 16
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 0
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 1
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP5]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %out = alloca %struct.S
+  %in = alloca %struct.S
+  %out.i = getelementptr inbounds nuw i8, ptr %out, i32 16
+  %in.i = getelementptr inbounds nuw i8, ptr %in, i32 16
+  tail call void @llvm.memcpy(ptr noundef nonnull align 1 dereferenceable(32) %out.i, ptr noundef nonnull align 1 dereferenceable(32) %in.i, i32 32, i1 false)
+  ret void
+}
+
+define void @test_structarray_alloca_typed() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_alloca_typed(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[OUT:%.*]] = alloca [[STRUCT_S:%.*]], align 16
+; CHECK-NEXT:    [[IN:%.*]] = alloca [[STRUCT_S]], align 16
+; CHECK-NEXT:    [[OUT_I:%.*]] = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr [[OUT]], i32 0, i32 1
+; CHECK-NEXT:    [[IN_I:%.*]] = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr [[IN]], i32 0, i32 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[GEP]], align 16
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 0
+; CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr [[GEP1]], align 16
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[GEP2]], align 16
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 1
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[GEP3]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %out = alloca %struct.S
+  %in = alloca %struct.S
+  %out.i = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr %out, i32 0, i32 1
+  %in.i = getelementptr { <4 x i32>, [2 x <4 x i32>] }, ptr %in, i32 0, i32 1
+  tail call void @llvm.memcpy(ptr noundef nonnull align 1 dereferenceable(32) %out.i, ptr noundef nonnull align 1 dereferenceable(32) %in.i, i32 32, i1 false)
+  ret void
+}
+
+ at shared1 = external hidden local_unnamed_addr addrspace(3) global %struct.S
+ at shared2 = external hidden local_unnamed_addr addrspace(3) global %struct.S
+define void @test_structarray_groupshared() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_groupshared(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared2, i32 16), align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared1, i32 16), align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared2, i32 16), i32 0, i32 1), align 16
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr addrspace(3) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared1, i32 16), i32 0, i32 1), align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  tail call void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) noundef nonnull align 1 dereferenceable(32) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared1, i32 16), ptr addrspace(3) noundef nonnull align 1 dereferenceable(32) getelementptr inbounds nuw (i8, ptr addrspace(3) @shared2, i32 16), i32 32, i1 false)
+  ret void
+}
+
+define void @test_structarray_in_buffer() "hlsl.export" {
+; CHECK-LABEL: define void @test_structarray_in_buffer(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[H_OUT:%.*]] = tail call target("dx.RawBuffer", [[STRUCT_S:%.*]], 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.Ss_1_0t(i32 0, i32 0, i32 1, i32 0, ptr null)
+; CHECK-NEXT:    [[H_IN:%.*]] = tail call target("dx.RawBuffer", [[STRUCT_S]], 0, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_s_struct.Ss_0_0t(i32 0, i32 1, i32 1, i32 0, ptr null)
+; CHECK-NEXT:    [[P_OUT:%.*]] = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_1_0t(target("dx.RawBuffer", [[STRUCT_S]], 1, 0) [[H_OUT]], i32 0)
+; CHECK-NEXT:    [[OUT_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P_OUT]], i32 16
+; CHECK-NEXT:    [[P_IN:%.*]] = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_s_struct.Ss_0_0t(target("dx.RawBuffer", [[STRUCT_S]], 0, 0) [[H_IN]], i32 0)
+; CHECK-NEXT:    [[IN_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P_IN]], i32 16
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 0
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[TMP2]], align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[OUT_I]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 16
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[IN_I]], i32 0, i32 1
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP5]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %h_out = tail call target("dx.RawBuffer", %struct.S, 1, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+  %h_in = tail call target("dx.RawBuffer", %struct.S, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 1, i32 1, i32 0, ptr null)
+  %p_out = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer(target("dx.RawBuffer", %struct.S, 1, 0) %h_out, i32 0)
+  %out.i = getelementptr inbounds nuw i8, ptr %p_out, i32 16
+  %p_in = tail call noundef nonnull align 1 dereferenceable(48) ptr @llvm.dx.resource.getpointer(target("dx.RawBuffer", %struct.S, 0, 0) %h_in, i32 0)
+  %in.i = getelementptr inbounds nuw i8, ptr %p_in, i32 16
+  tail call void @llvm.memcpy(ptr noundef nonnull align 1 dereferenceable(32) %in.i, ptr noundef nonnull align 1 dereferenceable(32) %out.i, i32 32, i1 false)
+  ret void
+}


        


More information about the llvm-commits mailing list