[llvm] [DirectX] Legalize memcpy (PR #139173)

Deric C. via llvm-commits llvm-commits at lists.llvm.org
Thu May 15 14:08:52 PDT 2025


https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/139173

>From a23c2216a698c2456e5fc6b6dfe68e57e604be08 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 8 May 2025 16:13:32 -0700
Subject: [PATCH 1/8] Legalize memcpy

---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp |  78 +++++++++
 llvm/test/CodeGen/DirectX/legalize-memcpy.ll | 174 +++++++++++++++++++
 2 files changed, 252 insertions(+)
 create mode 100644 llvm/test/CodeGen/DirectX/legalize-memcpy.ll

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index be77a70fa46ba..d9e560a9f8859 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -15,6 +15,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <functional>
 
@@ -246,6 +247,58 @@ downcastI64toI32InsertExtractElements(Instruction &I,
   }
 }
 
+static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
+                                ConstantInt *Length) {
+
+  uint64_t ByteLength = Length->getZExtValue();
+  if (ByteLength == 0)
+    return;
+
+  LLVMContext &Ctx = Builder.getContext();
+  const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
+
+  auto GetArrTyFromVal = [](Value *Val) {
+    if (auto *Alloca = dyn_cast<AllocaInst>(Val))
+      return dyn_cast<ArrayType>(Alloca->getAllocatedType());
+    if (auto *GlobalVar = dyn_cast<GlobalVariable>(Val))
+      return dyn_cast<ArrayType>(GlobalVar->getValueType());
+    llvm_unreachable(
+        "Expected an Alloca or GlobalVariable in memcpy Src and Dst");
+  };
+
+  ArrayType *ArrTy = GetArrTyFromVal(Dst);
+  assert(ArrTy && "Expected Dst of memcpy to be a Pointer to an Array Type");
+  if (auto *DstGlobalVar = dyn_cast<GlobalVariable>(Dst))
+    assert(!DstGlobalVar->isConstant() &&
+           "The Dst of memcpy must not be a constant Global Variable");
+
+  [[maybe_unused]] ArrayType *SrcArrTy = GetArrTyFromVal(Src);
+  assert(SrcArrTy && "Expected Src of memcpy to be a Pointer to an Array Type");
+
+  // This assumption simplifies implementation and covers currently-known
+  // use-cases for DXIL. It may be relaxed in the future if required.
+  assert(ArrTy == SrcArrTy && "Array Types of Src and Dst in memcpy must match");
+
+  Type *ElemTy = ArrTy->getElementType();
+  uint64_t ElemSize = DL.getTypeStoreSize(ElemTy);
+  assert(ElemSize > 0 && "Size must be set");
+
+  [[maybe_unused]] uint64_t Size = ArrTy->getArrayNumElements();
+  assert(ElemSize * Size >= ByteLength &&
+         "Array size must be at least as large as the memcpy length");
+
+  uint64_t NumElemsToCopy = ByteLength / ElemSize;
+  assert(ByteLength % ElemSize == 0 &&
+         "memcpy length must be divisible by array element type");
+  for (uint64_t I = 0; I < NumElemsToCopy; ++I) {
+    Value *Offset = ConstantInt::get(Type::getInt32Ty(Ctx), I);
+    Value *SrcPtr = Builder.CreateGEP(ElemTy, Src, Offset, "gep");
+    Value *SrcVal = Builder.CreateLoad(ElemTy, SrcPtr);
+    Value *DstPtr = Builder.CreateGEP(ElemTy, Dst, Offset, "gep");
+    Builder.CreateStore(SrcVal, DstPtr);
+  }
+}
+
 static void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val,
                                 ConstantInt *SizeCI,
                                 DenseMap<Value *, Value *> &ReplacedValues) {
@@ -296,6 +349,30 @@ static void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val,
   }
 }
 
+static void removeMemCpy(Instruction &I,
+                         SmallVectorImpl<Instruction *> &ToRemove,
+                         DenseMap<Value *, Value *> &ReplacedValues) {
+
+  CallInst *CI = dyn_cast<CallInst>(&I);
+  if (!CI)
+    return;
+
+  Intrinsic::ID ID = CI->getIntrinsicID();
+  if (ID != Intrinsic::memcpy)
+    return;
+
+  IRBuilder<> Builder(&I);
+  Value *Dst = CI->getArgOperand(0);
+  Value *Src = CI->getArgOperand(1);
+  ConstantInt *Length = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+  assert(Length && "Expected Length to be a ConstantInt");
+  ConstantInt *IsVolatile = dyn_cast<ConstantInt>(CI->getArgOperand(3));
+  assert(IsVolatile && "Expected IsVolatile to be a ConstantInt");
+  assert(IsVolatile->getZExtValue() == 0 && "Expected IsVolatile to be false");
+  emitMemcpyExpansion(Builder, Dst, Src, Length);
+  ToRemove.push_back(CI);
+}
+
 static void removeMemSet(Instruction &I,
                          SmallVectorImpl<Instruction *> &ToRemove,
                          DenseMap<Value *, Value *> &ReplacedValues) {
@@ -348,6 +425,7 @@ class DXILLegalizationPipeline {
     LegalizationPipeline.push_back(fixI8UseChain);
     LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);
     LegalizationPipeline.push_back(legalizeFreeze);
+    LegalizationPipeline.push_back(removeMemCpy);
     LegalizationPipeline.push_back(removeMemSet);
   }
 };
diff --git a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
new file mode 100644
index 0000000000000..09a4d051a2822
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-legalize -dxil-finalize-linkage -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+ at outputStrides = external local_unnamed_addr addrspace(2) global [2 x <4 x i32>], align 4
+
+define void @replace_2x4xint_global_memcpy_test() #0 {
+; CHECK-LABEL: define void @replace_2x4xint_global_memcpy_test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x <4 x i32>], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(2) @outputStrides, align 16
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr <4 x i32>, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[GEP]], align 16
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr (<4 x i32>, ptr addrspace(2) @outputStrides, i32 1), align 16
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr <4 x i32>, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[GEP1]], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [2 x <4 x i32>], align 16
+  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
+  call void @llvm.memcpy.p0.p2.i32(ptr nonnull align 16 dereferenceable(32) %1, ptr addrspace(2) align 16 dereferenceable(32) @outputStrides, i32 32, i1 false)
+  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
+  ret void
+}
+
+define void @replace_int_memcpy_test() #0 {
+; CHECK-LABEL: define void @replace_int_memcpy_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [1 x i32], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca [1 x i32], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP1]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [1 x i32], align 4
+  %2 = alloca [1 x i32], align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %2)
+  call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(4) %2, ptr align 4 dereferenceable(4) %1, i32 4, i1 false)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
+  ret void
+}
+
+define void @replace_int16_memcpy_test() #0 {
+; CHECK-LABEL: define void @replace_int16_memcpy_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x i16], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x i16], align 2
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    store i16 [[TMP3]], ptr [[GEP1]], align 2
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[GEP2]], align 2
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    store i16 [[TMP4]], ptr [[GEP3]], align 2
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [2 x i16], align 2
+  %2 = alloca [2 x i16], align 2
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %2)
+  call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 2 dereferenceable(4) %2, ptr align 2 dereferenceable(4) %1, i32 4, i1 false)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
+  ret void
+}
+
+define void @replace_float_memcpy_test() #0 {
+; CHECK-LABEL: define void @replace_float_memcpy_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x float], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x float], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    store float [[TMP3]], ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr float, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    store float [[TMP4]], ptr [[GEP3]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [2 x float], align 4
+  %2 = alloca [2 x float], align 4
+  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
+  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
+  call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(8) %2, ptr align 4 dereferenceable(8) %1, i32 8, i1 false)
+  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
+  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
+  ret void
+}
+
+define void @replace_double_memcpy_test() #0 {
+; CHECK-LABEL: define void @replace_double_memcpy_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x double], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x double], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr [[GEP]], align 8
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr double, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    store double [[TMP3]], ptr [[GEP1]], align 8
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr double, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[GEP2]], align 8
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr double, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    store double [[TMP4]], ptr [[GEP3]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [2 x double], align 4
+  %2 = alloca [2 x double], align 4
+  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
+  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
+  call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(8) %2, ptr align 4 dereferenceable(8) %1, i32 16, i1 false)
+  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
+  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
+  ret void
+}
+
+define void @replace_half_memcpy_test() #0 {
+; CHECK-LABEL: define void @replace_half_memcpy_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x half], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x half], align 2
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr half, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr half, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    store half [[TMP3]], ptr [[GEP1]], align 2
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr half, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load half, ptr [[GEP2]], align 2
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr half, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    store half [[TMP4]], ptr [[GEP3]], align 2
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP2]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [2 x half], align 2
+  %2 = alloca [2 x half], align 2
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %2)
+  call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 2 dereferenceable(4) %2, ptr align 2 dereferenceable(4) %1, i32 4, i1 false)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
+  ret void
+}
+
+attributes #0 = {"hlsl.export"}
+
+
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none))
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none))
+declare void @llvm.memcpy.p0.p2.i32(ptr noalias, ptr addrspace(2) noalias readonly, i32, i1)
+declare void @llvm.memcpy.p0.p0.i32(ptr noalias, ptr noalias readonly, i32, i1)

>From 6ab5c90e31f31f9d59c32512a390318e5f5b77e1 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 8 May 2025 16:47:58 -0700
Subject: [PATCH 2/8] Apply clang-format

---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index d9e560a9f8859..9d5886becb996 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -277,7 +277,8 @@ static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
 
   // This assumption simplifies implementation and covers currently-known
   // use-cases for DXIL. It may be relaxed in the future if required.
-  assert(ArrTy == SrcArrTy && "Array Types of Src and Dst in memcpy must match");
+  assert(ArrTy == SrcArrTy &&
+         "Array Types of Src and Dst in memcpy must match");
 
   Type *ElemTy = ArrTy->getElementType();
   uint64_t ElemSize = DL.getTypeStoreSize(ElemTy);

>From 8d1223794136379f5ba90885b9f084aa5cdd3031 Mon Sep 17 00:00:00 2001
From: "Deric C." <cheung.deric at gmail.com>
Date: Fri, 9 May 2025 12:16:55 -0700
Subject: [PATCH 3/8] Specify return type of lambda expression

Co-authored-by: Finn Plummer <canadienfinn at gmail.com>
---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 9d5886becb996..0b4b59e621773 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -257,7 +257,7 @@ static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
   LLVMContext &Ctx = Builder.getContext();
   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
 
-  auto GetArrTyFromVal = [](Value *Val) {
+  auto GetArrTyFromVal = [](Value *Val) -> ArrayType * {
     if (auto *Alloca = dyn_cast<AllocaInst>(Val))
       return dyn_cast<ArrayType>(Alloca->getAllocatedType());
     if (auto *GlobalVar = dyn_cast<GlobalVariable>(Val))

>From b44152e8b595cb328b18e37fc6bdbe0e4c649f47 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Fri, 9 May 2025 20:34:07 +0000
Subject: [PATCH 4/8] Replace llvm_unreachable with assert

---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 0b4b59e621773..e43c0ffe33175 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -15,7 +15,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <functional>
 
@@ -258,12 +257,14 @@ static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
 
   auto GetArrTyFromVal = [](Value *Val) -> ArrayType * {
+    assert(isa<AllocaInst>(Val) ||
+           isa<GlobalVariable>(Val) &&
+               "Expected Val to be an Alloca or Global Variable");
     if (auto *Alloca = dyn_cast<AllocaInst>(Val))
       return dyn_cast<ArrayType>(Alloca->getAllocatedType());
     if (auto *GlobalVar = dyn_cast<GlobalVariable>(Val))
       return dyn_cast<ArrayType>(GlobalVar->getValueType());
-    llvm_unreachable(
-        "Expected an Alloca or GlobalVariable in memcpy Src and Dst");
+    return nullptr;
   };
 
   ArrayType *ArrTy = GetArrTyFromVal(Dst);

>From 426f0d3218c044e1cd3d2f86cfbd70eaa2692086 Mon Sep 17 00:00:00 2001
From: "Deric C." <cheung.deric at gmail.com>
Date: Thu, 15 May 2025 13:01:45 -0700
Subject: [PATCH 5/8] Add a comment for ByteLength == 0 case of memcpy

Co-authored-by: Greg Roth <grroth at microsoft.com>
---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index e43c0ffe33175..bb9f47190a96e 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -250,6 +250,7 @@ static void emitMemcpyExpansion(IRBuilder<> &Builder, Value *Dst, Value *Src,
                                 ConstantInt *Length) {
 
   uint64_t ByteLength = Length->getZExtValue();
+  // If length to copy is zero, no memcpy is needed.
   if (ByteLength == 0)
     return;
 

>From a6c4ce5186f231a6481b5bbd7162faa13a224dd1 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 15 May 2025 20:40:00 +0000
Subject: [PATCH 6/8] Rename removeMemCpy to legalizeMemCpy

Co-authored-by: Greg Roth <grroth at microsoft.com>
---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index bb9f47190a96e..1c8732bcf79cf 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -352,9 +352,12 @@ static void emitMemsetExpansion(IRBuilder<> &Builder, Value *Dst, Value *Val,
   }
 }
 
-static void removeMemCpy(Instruction &I,
-                         SmallVectorImpl<Instruction *> &ToRemove,
-                         DenseMap<Value *, Value *> &ReplacedValues) {
+// Expands the instruction `I` into corresponding loads and stores if it is a
+// memcpy call. In that case, the call instruction is added to the `ToRemove`
+// vector. `ReplacedValues` is unused.
+static void legalizeMemCpy(Instruction &I,
+                           SmallVectorImpl<Instruction *> &ToRemove,
+                           DenseMap<Value *, Value *> &ReplacedValues) {
 
   CallInst *CI = dyn_cast<CallInst>(&I);
   if (!CI)
@@ -428,7 +431,7 @@ class DXILLegalizationPipeline {
     LegalizationPipeline.push_back(fixI8UseChain);
     LegalizationPipeline.push_back(downcastI64toI32InsertExtractElements);
     LegalizationPipeline.push_back(legalizeFreeze);
-    LegalizationPipeline.push_back(removeMemCpy);
+    LegalizationPipeline.push_back(legalizeMemCpy);
     LegalizationPipeline.push_back(removeMemSet);
   }
 };

>From cb67e1663f115df66a4ebdbfabc0775d66886cd2 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 15 May 2025 20:47:43 +0000
Subject: [PATCH 7/8] Remove lifetime markers from test

---
 llvm/test/CodeGen/DirectX/legalize-memcpy.ll | 47 --------------------
 1 file changed, 47 deletions(-)

diff --git a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
index 09a4d051a2822..f48bbe21456a7 100644
--- a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
@@ -7,20 +7,16 @@ define void @replace_2x4xint_global_memcpy_test() #0 {
 ; CHECK-LABEL: define void @replace_2x4xint_global_memcpy_test(
 ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x <4 x i32>], align 16
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[TMP1]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(2) @outputStrides, align 16
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr <4 x i32>, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[GEP]], align 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr (<4 x i32>, ptr addrspace(2) @outputStrides, i32 1), align 16
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr <4 x i32>, ptr [[TMP1]], i32 1
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[GEP1]], align 16
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   %1 = alloca [2 x <4 x i32>], align 16
-  call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1)
   call void @llvm.memcpy.p0.p2.i32(ptr nonnull align 16 dereferenceable(32) %1, ptr addrspace(2) align 16 dereferenceable(32) @outputStrides, i32 32, i1 false)
-  call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1)
   ret void
 }
 
@@ -29,23 +25,15 @@ define void @replace_int_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [1 x i32], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [1 x i32], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP1]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP2]])
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP1]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP2]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   %1 = alloca [1 x i32], align 4
   %2 = alloca [1 x i32], align 4
-  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %2)
   call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(4) %2, ptr align 4 dereferenceable(4) %1, i32 4, i1 false)
-  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
   ret void
 }
 
@@ -54,8 +42,6 @@ define void @replace_int16_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x i16], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x i16], align 2
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP1]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP2]])
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[GEP]], align 2
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0
@@ -64,17 +50,11 @@ define void @replace_int16_memcpy_test() #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[GEP2]], align 2
 ; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 1
 ; CHECK-NEXT:    store i16 [[TMP4]], ptr [[GEP3]], align 2
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP2]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   %1 = alloca [2 x i16], align 2
   %2 = alloca [2 x i16], align 2
-  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %2)
   call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 2 dereferenceable(4) %2, ptr align 2 dereferenceable(4) %1, i32 4, i1 false)
-  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
   ret void
 }
 
@@ -83,8 +63,6 @@ define void @replace_float_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x float], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x float], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[TMP1]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[TMP2]])
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr float, ptr [[TMP2]], i32 0
@@ -93,17 +71,11 @@ define void @replace_float_memcpy_test() #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr float, ptr [[TMP2]], i32 1
 ; CHECK-NEXT:    store float [[TMP4]], ptr [[GEP3]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[TMP2]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   %1 = alloca [2 x float], align 4
   %2 = alloca [2 x float], align 4
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
   call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(8) %2, ptr align 4 dereferenceable(8) %1, i32 8, i1 false)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
   ret void
 }
 
@@ -112,8 +84,6 @@ define void @replace_double_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x double], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x double], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[TMP1]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[TMP2]])
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr [[GEP]], align 8
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr double, ptr [[TMP2]], i32 0
@@ -122,17 +92,11 @@ define void @replace_double_memcpy_test() #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[GEP2]], align 8
 ; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr double, ptr [[TMP2]], i32 1
 ; CHECK-NEXT:    store double [[TMP4]], ptr [[GEP3]], align 8
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[TMP2]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   %1 = alloca [2 x double], align 4
   %2 = alloca [2 x double], align 4
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
   call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(8) %2, ptr align 4 dereferenceable(8) %1, i32 16, i1 false)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
   ret void
 }
 
@@ -141,8 +105,6 @@ define void @replace_half_memcpy_test() #0 {
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = alloca [2 x half], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [2 x half], align 2
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP1]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[TMP2]])
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr half, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load half, ptr [[GEP]], align 2
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr half, ptr [[TMP2]], i32 0
@@ -151,24 +113,15 @@ define void @replace_half_memcpy_test() #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = load half, ptr [[GEP2]], align 2
 ; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr half, ptr [[TMP2]], i32 1
 ; CHECK-NEXT:    store half [[TMP4]], ptr [[GEP3]], align 2
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP2]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   %1 = alloca [2 x half], align 2
   %2 = alloca [2 x half], align 2
-  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %2)
   call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 2 dereferenceable(4) %2, ptr align 2 dereferenceable(4) %1, i32 4, i1 false)
-  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
   ret void
 }
 
 attributes #0 = {"hlsl.export"}
 
-
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none))
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none))
 declare void @llvm.memcpy.p0.p2.i32(ptr noalias, ptr addrspace(2) noalias readonly, i32, i1)
 declare void @llvm.memcpy.p0.p0.i32(ptr noalias, ptr noalias readonly, i32, i1)

>From 2212ea8bb9bdd71bd82123cacbe39987854f4fa2 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 15 May 2025 21:05:11 +0000
Subject: [PATCH 8/8] Add a memcpy test for arrays of 3 ints

---
 llvm/test/CodeGen/DirectX/legalize-memcpy.ll | 25 ++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
index f48bbe21456a7..c191bdf583fdb 100644
--- a/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-memcpy.ll
@@ -37,6 +37,31 @@ define void @replace_int_memcpy_test() #0 {
   ret void
 }
 
+define void @replace_3int_memcpy_test() #0 {
+; CHECK-LABEL: define void @replace_3int_memcpy_test(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [3 x i32], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca [3 x i32], align 4
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    store i32 [[TMP3]], ptr [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP2]], align 4
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 1
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[GEP3]], align 4
+; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr i32, ptr [[TMP1]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[GEP4]], align 4
+; CHECK-NEXT:    [[GEP5:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2
+; CHECK-NEXT:    store i32 [[TMP5]], ptr [[GEP5]], align 4
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca [3 x i32], align 4
+  %2 = alloca [3 x i32], align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(12) %2, ptr align 4 dereferenceable(12) %1, i32 12, i1 false)
+  ret void
+}
+
 define void @replace_int16_memcpy_test() #0 {
 ; CHECK-LABEL: define void @replace_int16_memcpy_test(
 ; CHECK-SAME: ) #[[ATTR0]] {



More information about the llvm-commits mailing list