[Openmp-commits] [PATCH] D65836: Factor architecture dependent code out of loop.cu

Jon Chesterfield via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Tue Aug 6 17:15:51 PDT 2019


JonChesterfield created this revision.
JonChesterfield added reviewers: jdoerfert, ABataev, bollu, jfb, tra, grokos, Hahnfeld, guansong, xtian, gregrodgers, ronlieb, hfinkel, gtbercea, guraypp, arpith-jacob.
Herald added subscribers: openmp-commits, dexonsmith.
Herald added a project: OpenMP.
JonChesterfield edited the summary of this revision.

[libomptarget] Factor architecture dependent code out of loop.cu

Related to the patch series starting D64217 <https://reviews.llvm.org/D64217>. Added subscribers to said series as reviewers. This effort is smaller in scope.

This patch factors out just enough architecture dependent code from loop.cu to allow the same source to be used with amdgcn, given a different target_impl.h. Testing is that the same bitcode (modulo variable names) is generated for libomptarget before and after the refactor, for nvptx and the out of tree amdgcn.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D65836

Files:
  openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
  openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h


Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
===================================================================
--- /dev/null
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -0,0 +1,43 @@
+//===------------ target_impl.h - NVPTX OpenMP GPU options ------- CUDA -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Definitions of target specific functions
+//
+//===----------------------------------------------------------------------===//
+#ifndef _TARGET_IMPL_H_
+#define _TARGET_IMPL_H_
+
+#include <stdint.h>
+
+#include "omptarget-nvptx.h"
+
+#define FORCEINLINE __forceinline__ __device__
+
+FORCEINLINE void __kmpc_impl_unpack(int64_t val, int32_t *lo, int32_t *hi) {
+  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(*lo), "=r"(*hi) : "l"(val));
+}
+
+FORCEINLINE int64_t __kmpc_impl_pack(int32_t lo, int32_t hi) {
+  int64_t val;
+  asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
+  return val;
+}
+
+typedef uint32_t __kmpc_impl_lanemask_t;
+
+FORCEINLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
+  uint32_t res;
+  asm("mov.u32 %0, %%lanemask_lt;" : "=r"(res));
+  return res;
+}
+
+FORCEINLINE int __kmpc_impl_ffs(uint32_t x) { return __ffs(x); }
+
+FORCEINLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); }
+
+#endif
Index: openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "omptarget-nvptx.h"
+#include "target_impl.h"
 
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
@@ -381,20 +382,18 @@
 
   INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
     int lo, hi;
-    asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
+    __kmpc_impl_unpack(val, &lo, &hi);
     hi = __SHFL_SYNC(active, hi, leader);
     lo = __SHFL_SYNC(active, lo, leader);
-    asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
-    return val;
+    return __kmpc_impl_pack(lo, hi);
   }
 
   INLINE static uint64_t NextIter() {
-    unsigned int active = __ACTIVEMASK();
-    int leader = __ffs(active) - 1;
-    int change = __popc(active);
-    unsigned lane_mask_lt;
-    asm("mov.u32 %0, %%lanemask_lt;" : "=r"(lane_mask_lt));
-    unsigned int rank = __popc(active & lane_mask_lt);
+    __kmpc_impl_lanemask_t active = __ACTIVEMASK();
+    int leader = __kmpc_impl_ffs(active) - 1;
+    int change = __kmpc_impl_popc(active);
+    __kmpc_impl_lanemask_t lane_mask_lt = __kmpc_impl_lanemask_lt();
+    unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt);
     uint64_t warp_res;
     if (rank == 0) {
       warp_res = atomicAdd(


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D65836.213755.patch
Type: text/x-patch
Size: 3221 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20190807/59ac691e/attachment-0001.bin>


More information about the Openmp-commits mailing list