[clang] [llvm] [OpenMP] Fix stack corruption due to argument mismatch (PR #96386)

Sushant Gokhale via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 27 00:36:48 PDT 2024


https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/96386

>From af4dc96c25f32b477337cedaeb0a696f75840ac0 Mon Sep 17 00:00:00 2001
From: sgokhale <sgokhale at nvidia.com>
Date: Sat, 22 Jun 2024 17:16:24 +0530
Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch

While lowering (#pragma omp target update from), clang's generated
.omp_task_entry. is setting up 9 arguments while calling
__tgt_target_data_update_nowait_mapper.

At the same time, in __tgt_target_data_update_nowait_mapper, call to
targetData<TaskAsyncInfoWrapperTy>() is converted to a sibcall assuming
it has the argument count listed in the signature.

AARCH64 asm sequence for this is as follows (removed unrelated insns):

.omp_task_entry..108:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16       // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8. // stack canary
  str   xzr, [sp]
  bl   __tgt_target_data_update_nowait_mapper

__tgt_target_data_update_nowait_mapper:
  sub   sp, sp, #32
  stp   x29, x30, sp, #16       // 16-byte Folded Spill
  add   x29, sp, #16
  str   x8, sp, #8 // stack canary
  // Sibcall argument setup
  adrp  x8, :got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb
  ldr   x8, [x8, :got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb]
  stp   x9, x8, x29, #16
  adrp  x8, .L.str.8
  add   x8, x8, :lo12:.L.str.8
  str   x8, x29, #32. <==. This is the insn that erases $fp

  ldp   x29, x30, sp, #16       // 16-byte Folded Reload
  add   sp, sp, #32
  // Sibcall
  b    ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD

On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry.
sets up only single space on stack and this results in ovewriting $fp
and subsequent stack corruption. This issue can be credited to discrepancy of
__tgt_target_data_update_nowait_mapper signature in
openmp/libomptarget/include/omptarget.h taking 13 arguments while
clang/lib/CodeGen/CGOpenMPRuntime.cpp and
llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments.

This patch modifies __tgt_target_data_update_nowait_mapper signature
to match .omp_task_entry usage(and other 2 files mentioned above).

Co-authored-by: Kugan Vivekanandarajah <kvivekananda at nvidia.com>
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 28 +++++++++++++++--
 .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 ++++++++++++-------
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..fc3ad533666ca 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10343,6 +10343,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
         MapNamesArray,
         InputInfo.MappersArray.emitRawPointer(CGF)};
 
+    // Nowait calls have header declarations that take 13 arguments. Hence, the
+    // divergence from the OffloadingArgs definition.
+    llvm::Value *NowaitOffloadingArgs[] = {
+        RTLoc,
+        DeviceID,
+        PointerNum,
+        InputInfo.BasePointersArray.emitRawPointer(CGF),
+        InputInfo.PointersArray.emitRawPointer(CGF),
+        InputInfo.SizesArray.emitRawPointer(CGF),
+        MapTypesArray,
+        MapNamesArray,
+        InputInfo.MappersArray.emitRawPointer(CGF),
+        llvm::Constant::getNullValue(CGF.Int32Ty),
+        llvm::Constant::getNullValue(CGF.VoidPtrTy),
+        llvm::Constant::getNullValue(CGF.Int32Ty),
+        llvm::Constant::getNullValue(CGF.VoidPtrTy)};
+
     // Select the right runtime function call for each standalone
     // directive.
     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
@@ -10430,9 +10447,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
       llvm_unreachable("Unexpected standalone target data directive.");
       break;
     }
-    CGF.EmitRuntimeCall(
-        OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
-        OffloadingArgs);
+    if (HasNowait)
+      CGF.EmitRuntimeCall(
+          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
+          NowaitOffloadingArgs);
+    else
+      CGF.EmitRuntimeCall(
+          OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
+          OffloadingArgs);
   };
 
   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index fe09bb8177c28..ebd928470109a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -438,19 +438,22 @@ __OMP_RTL(__tgt_target_kernel_nowait, false, Int32, IdentPtr, Int64, Int32,
           Int32, VoidPtr, KernelArgsPtr, Int32, VoidPtr, Int32, VoidPtr)
 __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
           VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
-          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64,
+          Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
+          VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
 __OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, IdentPtr, Int64, Int32,
           VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, AsyncInfoPtr)
 __OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr)
 __OMP_RTL(__tgt_target_data_end_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
           VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
-          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64,
+          Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
+          VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
 __OMP_RTL(__tgt_target_data_update_mapper, false, Void, IdentPtr, Int64, Int32,
           VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
-__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
-          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64,
+          Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
+          VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
 __OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr)
 __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
           Int64, Int64, VoidPtr)
@@ -1026,10 +1029,12 @@ __OMP_RTL_ATTRS(__tgt_target_kernel_nowait, ForkAttrs, SExt,
                            SExt))
 __OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(),
                 ParamAttrs(AttributeSet(), AttributeSet(), SExt))
-__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs,
+                AttributeSet(),
                 ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
                            AttributeSet(), AttributeSet(), AttributeSet(),
-                           AttributeSet(), AttributeSet()))
+                           AttributeSet(), AttributeSet(), SExt, AttributeSet(),
+                           SExt, AttributeSet()))
 __OMP_RTL_ATTRS(__tgt_target_data_begin_mapper_issue, AttributeSet(),
                 AttributeSet(),
                 ParamAttrs(AttributeSet(), AttributeSet(), SExt))
@@ -1038,13 +1043,16 @@ __OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(),
 __OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs, AttributeSet(),
                 ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
                            AttributeSet(), AttributeSet(), AttributeSet(),
-                           AttributeSet(), AttributeSet()))
+                           AttributeSet(), AttributeSet(), SExt, AttributeSet(),
+                           SExt, AttributeSet()))
 __OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(),
                 ParamAttrs(AttributeSet(), AttributeSet(), SExt))
-__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs,
+                AttributeSet(),
                 ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
                            AttributeSet(), AttributeSet(), AttributeSet(),
-                           AttributeSet(), AttributeSet()))
+                           AttributeSet(), AttributeSet(), SExt, AttributeSet(),
+                           SExt, AttributeSet()))
 __OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(),
                 ParamAttrs())
 __OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(),



More information about the cfe-commits mailing list