[llvm] [NVPTX] Convert calls to indirect when call signature mismatches function signature (PR #107644)
Kevin McAfee via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 11:58:15 PDT 2024
https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/107644
>From c2392b92b601f668558bd07eea153adce574d3c3 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 30 Aug 2024 11:55:14 -0700
Subject: [PATCH 1/3] [NVPTX] Convert calls to indirect when call signature
mismatches function signature
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 47 +++++++++-
llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll | 4 +-
.../CodeGen/NVPTX/convert-call-to-indirect.ll | 89 +++++++++++++++++++
3 files changed, 137 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 5c5766a8b23455..2be9b76ba9fc96 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1657,6 +1657,33 @@ LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset,
return RetVal;
}
+static bool shouldConvertToIndirectCall(bool IsVarArg, unsigned ParamCount,
+ NVPTXTargetLowering::ArgListTy &Args,
+ const CallBase *CB,
+ GlobalAddressSDNode *Func) {
+ if (!Func)
+ return false;
+ auto *CalleeFunc = dyn_cast<Function>(Func->getGlobal());
+ if (!CalleeFunc)
+ return false;
+
+ auto ActualReturnType = CalleeFunc->getReturnType();
+ if (CB->getType() != ActualReturnType)
+ return true;
+
+ if (IsVarArg)
+ return false;
+
+ auto ActualNumParams = CalleeFunc->getFunctionType()->getNumParams();
+ if (ParamCount != ActualNumParams)
+ return true;
+ for (const Argument &I : CalleeFunc->args())
+ if (I.getType() != Args[I.getArgNo()].Ty)
+ return true;
+
+ return false;
+}
+
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1971,10 +1998,16 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
VADeclareParam->getVTList(), DeclareParamOps);
}
+ // If the param count, type of any param, or return type of the callsite
+ // mismatches with that of the function signature, convert the callsite to an
+ // indirect call.
+ bool ConvertToIndirectCall =
+ shouldConvertToIndirectCall(CLI.IsVarArg, ParamCount, Args, CB, Func);
+
// Both indirect calls and libcalls have nullptr Func. In order to distinguish
// between them we must rely on the call site value which is valid for
// indirect calls but is always null for libcalls.
- bool isIndirectCall = !Func && CB;
+ bool isIndirectCall = (!Func && CB) || ConvertToIndirectCall;
if (isa<ExternalSymbolSDNode>(Callee)) {
Function* CalleeFunc = nullptr;
@@ -2026,6 +2059,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
InGlue = Chain.getValue(1);
+ if (ConvertToIndirectCall) {
+ // Copy the function ptr to a ptx register and use the register to call the
+ // function.
+ EVT DestVT = Callee.getValueType();
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned DestReg =
+ RegInfo.createVirtualRegister(TLI.getRegClassFor(DestVT.getSimpleVT()));
+ auto RegCopy = DAG.getCopyToReg(DAG.getEntryNode(), dl, DestReg, Callee);
+ Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT);
+ }
+
// Ops to print out the function name
SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallVoidOps[] = { Chain, Callee, InGlue };
diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
index c5f7bd1bd1ba20..bd723a296e620f 100644
--- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
@@ -17,8 +17,8 @@ target triple = "nvptx64-nvidia-cuda"
; CHECK: st.param.b16 [param2+0], %rs1;
; CHECK: st.param.b16 [param2+2], %rs2;
; CHECK: .param .align 2 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: _Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE,
+; CHECK-NEXT: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]);
+; CHECK-NEXT: call (retval0),
define weak_odr void @foo() {
entry:
%call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null)
diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
new file mode 100644
index 00000000000000..2602c3b0d041b5
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify %}
+
+%struct.64 = type <{ i64 }>
+declare i64 @callee(ptr %p);
+declare i64 @callee_variadic(ptr %p, ...);
+
+define %struct.64 @test_return_type_mismatch(ptr %p) {
+; CHECK-LABEL: test_return_type_mismatch(
+; CHECK: .param .align 1 .b8 retval0[8];
+; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd
+; CHECK-NEXT: (
+; CHECK-NEXT: param0
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_0;
+ %ret = call %struct.64 @callee(ptr %p)
+ ret %struct.64 %ret
+}
+
+define i64 @test_param_type_mismatch(ptr %p) {
+; CHECK-LABEL: test_param_type_mismatch(
+; CHECK: .param .b64 retval0;
+; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd
+; CHECK-NEXT: (
+; CHECK-NEXT: param0
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_1;
+ %ret = call i64 @callee(i64 7)
+ ret i64 %ret
+}
+
+define i64 @test_param_count_mismatch(ptr %p) {
+; CHECK-LABEL: test_param_count_mismatch(
+; CHECK: .param .b64 retval0;
+; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_2;
+ %ret = call i64 @callee(ptr %p, i64 7)
+ ret i64 %ret
+}
+
+define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
+; CHECK-LABEL: test_return_type_mismatch_variadic(
+; CHECK: .param .align 1 .b8 retval0[8];
+; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
+; CHECK-NEXT: call (retval0),
+; CHECK-NEXT: %rd
+; CHECK-NEXT: (
+; CHECK-NEXT: param0
+; CHECK-NEXT: )
+; CHECK-NEXT: , prototype_3;
+ %ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p)
+ ret %struct.64 %ret
+}
+
+define i64 @test_param_type_mismatch_variadic(ptr %p) {
+; CHECK-LABEL: test_param_type_mismatch_variadic(
+; CHECK: .param .b64 retval0;
+; CHECK-NEXT: call.uni (retval0),
+; CHECK-NEXT: callee_variadic
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: )
+ %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
+ ret i64 %ret
+}
+
+define i64 @test_param_count_mismatch_variadic(ptr %p) {
+; CHECK-LABEL: test_param_count_mismatch_variadic(
+; CHECK: .param .b64 retval0;
+; CHECK-NEXT: call.uni (retval0),
+; CHECK-NEXT: callee_variadic
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: )
+ %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
+ ret i64 %ret
+}
>From aaa1d280e5b31df56fa781d05c5ba67df2e12fa0 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Wed, 11 Sep 2024 11:53:56 -0700
Subject: [PATCH 2/3] Refactor to use existing api instead of manual checks for
types
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 30 +++++----------------
1 file changed, 6 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 2be9b76ba9fc96..fbbaff75d04570 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1657,31 +1657,15 @@ LowerUnalignedLoadRetParam(SelectionDAG &DAG, SDValue &Chain, uint64_t Offset,
return RetVal;
}
-static bool shouldConvertToIndirectCall(bool IsVarArg, unsigned ParamCount,
- NVPTXTargetLowering::ArgListTy &Args,
- const CallBase *CB,
- GlobalAddressSDNode *Func) {
+static bool shouldConvertToIndirectCall(const CallBase *CB,
+ const GlobalAddressSDNode *Func) {
if (!Func)
return false;
auto *CalleeFunc = dyn_cast<Function>(Func->getGlobal());
if (!CalleeFunc)
return false;
- auto ActualReturnType = CalleeFunc->getReturnType();
- if (CB->getType() != ActualReturnType)
- return true;
-
- if (IsVarArg)
- return false;
-
- auto ActualNumParams = CalleeFunc->getFunctionType()->getNumParams();
- if (ParamCount != ActualNumParams)
- return true;
- for (const Argument &I : CalleeFunc->args())
- if (I.getType() != Args[I.getArgNo()].Ty)
- return true;
-
- return false;
+ return CB->getFunctionType() != CalleeFunc->getFunctionType();
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -1998,11 +1982,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
VADeclareParam->getVTList(), DeclareParamOps);
}
- // If the param count, type of any param, or return type of the callsite
- // mismatches with that of the function signature, convert the callsite to an
- // indirect call.
- bool ConvertToIndirectCall =
- shouldConvertToIndirectCall(CLI.IsVarArg, ParamCount, Args, CB, Func);
+ // If the type of the callsite does not match that of the function, convert
+ // the callsite to an indirect call.
+ bool ConvertToIndirectCall = shouldConvertToIndirectCall(CB, Func);
// Both indirect calls and libcalls have nullptr Func. In order to distinguish
// between them we must rely on the call site value which is valid for
>From 6c4a734dfc21e5ef9a09379fb7c806a355023e25 Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Wed, 11 Sep 2024 11:54:18 -0700
Subject: [PATCH 3/3] Update test to account for indirect call conversion
---
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 110 ++++++++++--------
1 file changed, 61 insertions(+), 49 deletions(-)
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 176dfee11cfb09..b203a78d677308 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -72,21 +72,24 @@ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-LABEL: grid_const_escape(
; PTX: {
; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<4>;
+; PTX-NEXT: .reg .b64 %rd<5>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: mov.b64 %rd1, grid_const_escape_param_0;
-; PTX-NEXT: mov.u64 %rd2, %rd1;
-; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
+; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
+; PTX-NEXT: mov.u64 %rd3, %rd2;
+; PTX-NEXT: cvta.param.u64 %rd4, %rd3;
+; PTX-NEXT: mov.u64 %rd1, escape;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0+0], %rd3;
+; PTX-NEXT: st.param.b64 [param0+0], %rd4;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: call.uni (retval0),
-; PTX-NEXT: escape,
+; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: call (retval0),
+; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: )
+; PTX-NEXT: , prototype_0;
; PTX-NEXT: ld.param.b32 %r1, [retval0+0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
@@ -107,36 +110,39 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b32 %r<4>;
-; PTX-NEXT: .reg .b64 %rd<9>;
+; PTX-NEXT: .reg .b64 %rd<10>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.u64 %SPL, __local_depot3;
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0;
-; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2;
-; PTX-NEXT: mov.u64 %rd3, %rd2;
+; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_0;
+; PTX-NEXT: mov.b64 %rd3, multiple_grid_const_escape_param_2;
+; PTX-NEXT: mov.u64 %rd4, %rd3;
; PTX-NEXT: ld.param.u32 %r1, [multiple_grid_const_escape_param_1];
-; PTX-NEXT: cvta.param.u64 %rd4, %rd3;
-; PTX-NEXT: mov.u64 %rd5, %rd1;
-; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
-; PTX-NEXT: add.u64 %rd7, %SP, 0;
-; PTX-NEXT: add.u64 %rd8, %SPL, 0;
-; PTX-NEXT: st.local.u32 [%rd8], %r1;
+; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
+; PTX-NEXT: mov.u64 %rd6, %rd2;
+; PTX-NEXT: cvta.param.u64 %rd7, %rd6;
+; PTX-NEXT: add.u64 %rd8, %SP, 0;
+; PTX-NEXT: add.u64 %rd9, %SPL, 0;
+; PTX-NEXT: st.local.u32 [%rd9], %r1;
+; PTX-NEXT: mov.u64 %rd1, escape3;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0+0], %rd6;
+; PTX-NEXT: st.param.b64 [param0+0], %rd7;
; PTX-NEXT: .param .b64 param1;
-; PTX-NEXT: st.param.b64 [param1+0], %rd7;
+; PTX-NEXT: st.param.b64 [param1+0], %rd8;
; PTX-NEXT: .param .b64 param2;
-; PTX-NEXT: st.param.b64 [param2+0], %rd4;
+; PTX-NEXT: st.param.b64 [param2+0], %rd5;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: call.uni (retval0),
-; PTX-NEXT: escape3,
+; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
+; PTX-NEXT: call (retval0),
+; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0,
; PTX-NEXT: param1,
; PTX-NEXT: param2
-; PTX-NEXT: );
+; PTX-NEXT: )
+; PTX-NEXT: , prototype_1;
; PTX-NEXT: ld.param.b32 %r2, [retval0+0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
@@ -221,26 +227,29 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escape(
; PTX: {
; PTX-NEXT: .reg .b32 %r<5>;
-; PTX-NEXT: .reg .b64 %rd<6>;
+; PTX-NEXT: .reg .b64 %rd<7>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escape_param_0;
-; PTX-NEXT: ld.param.u64 %rd2, [grid_const_partial_escape_param_1];
-; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
-; PTX-NEXT: mov.u64 %rd4, %rd1;
-; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
-; PTX-NEXT: ld.u32 %r1, [%rd5];
+; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escape_param_0;
+; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escape_param_1];
+; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
+; PTX-NEXT: mov.u64 %rd5, %rd2;
+; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
+; PTX-NEXT: ld.u32 %r1, [%rd6];
; PTX-NEXT: add.s32 %r2, %r1, %r1;
-; PTX-NEXT: st.global.u32 [%rd3], %r2;
+; PTX-NEXT: st.global.u32 [%rd4], %r2;
+; PTX-NEXT: mov.u64 %rd1, escape;
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0+0], %rd5;
+; PTX-NEXT: st.param.b64 [param0+0], %rd6;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: call.uni (retval0),
-; PTX-NEXT: escape,
+; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: call (retval0),
+; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: )
+; PTX-NEXT: , prototype_2;
; PTX-NEXT: ld.param.b32 %r3, [retval0+0];
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
@@ -266,27 +275,30 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu
; PTX-LABEL: grid_const_partial_escapemem(
; PTX: {
; PTX-NEXT: .reg .b32 %r<6>;
-; PTX-NEXT: .reg .b64 %rd<6>;
+; PTX-NEXT: .reg .b64 %rd<7>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escapemem_param_0;
-; PTX-NEXT: ld.param.u64 %rd2, [grid_const_partial_escapemem_param_1];
-; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
-; PTX-NEXT: mov.u64 %rd4, %rd1;
-; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
-; PTX-NEXT: ld.u32 %r1, [%rd5];
-; PTX-NEXT: ld.u32 %r2, [%rd5+4];
-; PTX-NEXT: st.global.u64 [%rd3], %rd5;
+; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escapemem_param_0;
+; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escapemem_param_1];
+; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
+; PTX-NEXT: mov.u64 %rd5, %rd2;
+; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
+; PTX-NEXT: ld.u32 %r1, [%rd6];
+; PTX-NEXT: ld.u32 %r2, [%rd6+4];
+; PTX-NEXT: st.global.u64 [%rd4], %rd6;
; PTX-NEXT: add.s32 %r3, %r1, %r2;
+; PTX-NEXT: mov.u64 %rd1, escape;
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0+0], %rd5;
+; PTX-NEXT: st.param.b64 [param0+0], %rd6;
; PTX-NEXT: .param .b32 retval0;
-; PTX-NEXT: call.uni (retval0),
-; PTX-NEXT: escape,
+; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: call (retval0),
+; PTX-NEXT: %rd1,
; PTX-NEXT: (
; PTX-NEXT: param0
-; PTX-NEXT: );
+; PTX-NEXT: )
+; PTX-NEXT: , prototype_3;
; PTX-NEXT: ld.param.b32 %r4, [retval0+0];
; PTX-NEXT: } // callseq 3
; PTX-NEXT: st.param.b32 [func_retval0+0], %r3;
More information about the llvm-commits
mailing list