[clang] [DirectX] Avoid putting ByVal on out and inout parameters (PR #150495)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 24 11:29:57 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Deric C. (Icohedron)
<details>
<summary>Changes</summary>
Fixes #<!-- -->148063 by preventing the ByVal attribute from being placed on out and inout function parameters which causes them to be eliminated by the Dead Store Elimination (DSE) pass.
---
Full diff: https://github.com/llvm/llvm-project/pull/150495.diff
2 Files Affected:
- (modified) clang/lib/CodeGen/CGCall.cpp (+22-14)
- (modified) clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl (+6-6)
``````````diff
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 0bceecec6e555..acd7de86390f9 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2852,20 +2852,28 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
if (AI.getInReg())
Attrs.addAttribute(llvm::Attribute::InReg);
- // Depending on the ABI, this may be either a byval or a dead_on_return
- // argument.
- if (AI.getIndirectByVal()) {
- Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
- } else {
- // Add dead_on_return when the object's lifetime ends in the callee.
- // This includes trivially-destructible objects, as well as objects
- // whose destruction / clean-up is carried out within the callee (e.g.,
- // Obj-C ARC-managed structs, MSVC callee-destroyed objects).
- if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
- ParamType->castAs<RecordType>()
- ->getDecl()
- ->isParamDestroyedInCallee())
- Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
+ // HLSL out and inout parameters must not be marked with ByVal or
+ // DeadOnReturn attributes, as they would be eliminated by Dead Store
+ // Elimination.
+ if (auto ParamABI = FI.getExtParameterInfo(ArgNo).getABI();
+ ParamABI != ParameterABI::HLSLOut &&
+ ParamABI != ParameterABI::HLSLInOut) {
+
+ // Depending on the ABI, this may be either a byval or a dead_on_return
+ // argument.
+ if (AI.getIndirectByVal()) {
+ Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
+ } else {
+ // Add dead_on_return when the object's lifetime ends in the callee.
+ // This includes trivially-destructible objects, as well as objects
+ // whose destruction / clean-up is carried out within the callee (e.g.,
+ // Obj-C ARC-managed structs, MSVC callee-destroyed objects).
+ if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
+ ParamType->castAs<RecordType>()
+ ->getDecl()
+ ->isParamDestroyedInCallee())
+ Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
+ }
}
auto *Decl = ParamType->getAsRecordDecl();
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl
index bccfaf597f0ed..4571649717d8a 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl
@@ -11,7 +11,7 @@ void increment(inout int Arr[2]) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -32,7 +32,7 @@ void fn2(out int Arr[2]) {
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -56,7 +56,7 @@ void nestedCall(inout int Arr[2], uint index) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]], i32 noundef 0)
+// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef align 4 [[Tmp]], i32 noundef 0)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 1
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -70,7 +70,7 @@ export int arrayCall3() {
// CHECK-LABEL: outerCall
// CHECK: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 %{{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 {{.*}}, ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: ret void
void outerCall(inout int Arr[2]) {
@@ -82,7 +82,7 @@ void outerCall(inout int Arr[2]) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -110,7 +110,7 @@ void outerCall2(inout int Arr[2]) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
``````````
</details>
https://github.com/llvm/llvm-project/pull/150495
More information about the cfe-commits
mailing list