[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)
Sushant Gokhale via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 27 00:36:48 PDT 2024
https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/96386
>From af4dc96c25f32b477337cedaeb0a696f75840ac0 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Sat, 22 Jun 2024 17:16:24 +0530
Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch
While lowering (#pragma omp target update from), clang's generated
.omp_task_entry. is setting up 9 arguments while calling
__tgt_target_data_update_nowait_mapper.
At the same time, in __tgt_target_data_update_nowait_mapper, call to
targetData<TaskAsyncInfoWrapperTy>() is converted to a sibcall assuming
it has the argument count listed in the signature.
AARCH64 asm sequence for this is as follows (removed unrelated insns):
.omp_task_entry..108:
sub sp, sp, #32
stp x29, x30, sp, #16 // 16-byte Folded Spill
add x29, sp, #16
str x8, sp, #8. // stack canary
str xzr, [sp]
bl __tgt_target_data_update_nowait_mapper
__tgt_target_data_update_nowait_mapper:
sub sp, sp, #32
stp x29, x30, sp, #16 // 16-byte Folded Spill
add x29, sp, #16
str x8, sp, #8 // stack canary
// Sibcall argument setup
adrp x8, :got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb
ldr x8, [x8, :got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb]
stp x9, x8, x29, #16
adrp x8, .L.str.8
add x8, x8, :lo12:.L.str.8
str x8, x29, #32. <==. This is the insn that erases $fp
ldp x29, x30, sp, #16 // 16-byte Folded Reload
add sp, sp, #32
// Sibcall
b ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD
On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry.
sets up only single space on stack and this results in ovewriting $fp
and subsequent stack corruption. This issue can be credited to discrepancy of
__tgt_target_data_update_nowait_mapper signature in
openmp/libomptarget/include/omptarget.h taking 13 arguments while
clang/lib/CodeGen/CGOpenMPRuntime.cpp and
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments.
This patch modifies __tgt_target_data_update_nowait_mapper signature
to match .omp_task_entry usage(and other 2 files mentioned above).
Co-authored-by: Kugan Vivekanandarajah <kvivekananda at nvidia.com>
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 28 +++++++++++++++--
.../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 ++++++++++++-------
2 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..fc3ad533666ca 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10343,6 +10343,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
MapNamesArray,
InputInfo.MappersArray.emitRawPointer(CGF)};
+ // Nowait calls have header declarations that take 13 arguments. Hence, the
+ // divergence from the OffloadingArgs definition.
+ llvm::Value *NowaitOffloadingArgs[] = {
+ RTLoc,
+ DeviceID,
+ PointerNum,
+ InputInfo.BasePointersArray.emitRawPointer(CGF),
+ InputInfo.PointersArray.emitRawPointer(CGF),
+ InputInfo.SizesArray.emitRawPointer(CGF),
+ MapTypesArray,
+ MapNamesArray,
+ InputInfo.MappersArray.emitRawPointer(CGF),
+ llvm::Constant::getNullValue(CGF.Int32Ty),
+ llvm::Constant::getNullValue(CGF.VoidPtrTy),
+ llvm::Constant::getNullValue(CGF.Int32Ty),
+ llvm::Constant::getNullValue(CGF.VoidPtrTy)};
+
// Select the right runtime function call for each standalone
// directive.
const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
@@ -10430,9 +10447,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
llvm_unreachable("Unexpected standalone target data directive.");
break;
}
- CGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
- OffloadingArgs);
+ if (HasNowait)
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
+ NowaitOffloadingArgs);
+ else
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
+ OffloadingArgs);
};
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index fe09bb8177c28..ebd928470109a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -438,19 +438,22 @@ __OMP_RTL(__tgt_target_kernel_nowait, false, Int32, IdentPtr, Int64, Int32,
Int32, VoidPtr, KernelArgsPtr, Int32, VoidPtr, Int32, VoidPtr)
__OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
- VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64,
+ Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
+ VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, IdentPtr, Int64, Int32,
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, AsyncInfoPtr)
__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr)
__OMP_RTL(__tgt_target_data_end_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
- VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64,
+ Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
+ VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
__OMP_RTL(__tgt_target_data_update_mapper, false, Void, IdentPtr, Int64, Int32,
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
- VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64,
+ Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
+ VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
__OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr)
__OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
Int64, Int64, VoidPtr)
@@ -1026,10 +1029,12 @@ __OMP_RTL_ATTRS(__tgt_target_kernel_nowait, ForkAttrs, SExt,
SExt))
__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(),
ParamAttrs(AttributeSet(), AttributeSet(), SExt))
-__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs,
+ AttributeSet(),
ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
AttributeSet(), AttributeSet(), AttributeSet(),
- AttributeSet(), AttributeSet()))
+ AttributeSet(), AttributeSet(), SExt, AttributeSet(),
+ SExt, AttributeSet()))
__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper_issue, AttributeSet(),
AttributeSet(),
ParamAttrs(AttributeSet(), AttributeSet(), SExt))
@@ -1038,13 +1043,16 @@ __OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs, AttributeSet(),
ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
AttributeSet(), AttributeSet(), AttributeSet(),
- AttributeSet(), AttributeSet()))
+ AttributeSet(), AttributeSet(), SExt, AttributeSet(),
+ SExt, AttributeSet()))
__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(),
ParamAttrs(AttributeSet(), AttributeSet(), SExt))
-__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs,
+ AttributeSet(),
ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
AttributeSet(), AttributeSet(), AttributeSet(),
- AttributeSet(), AttributeSet()))
+ AttributeSet(), AttributeSet(), SExt, AttributeSet(),
+ SExt, AttributeSet()))
__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(),
ParamAttrs())
__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(),
More information about the cfe-commits
mailing list