[Openmp-commits] [PATCH] D14031: [OpenMP] Initial implementation of OpenMP offloading library - libomptarget.
Jonas Hahnfeld via Openmp-commits
openmp-commits at lists.llvm.org
Mon Oct 26 07:57:35 PDT 2015
Hahnfeld added a comment.
Great work!
Would it be possible to split this into multiple, smaller reviews (for main library, plugins and device runtime for nvptx)?
Greetings
Jonas
================
Comment at: libomptarget/deviceRTLs/nvptx/src/parallel.cu:151-158
@@ +150,10 @@
+ }
+// } else {
+// // not a for with a simd inside: use only one lane
+// // we may have started thread_limit*simd_info CUDA threads
+// // and we need to set the number of threads to thread_limit value
+// // FIXME: is this always the case, even if numLanes > 1?
+//// newTaskDescr->ThreadId() = threadIdx.x;
+// //newTaskDescr->ThreadsInTeam();// = newTaskDescr->ThreadLimit();
+// }
+}
----------------
Commented out - should this be committed?
================
Comment at: libomptarget/deviceRTLs/nvptx/src/reduction.cu:71-117
@@ +70,49 @@
+ }
+// return 2;
+ /**
+ * Only when all the threads in a block are doing reduction,
+ * the warpBlockRedu is used. Otherwise atomic.
+ * check the data type, too.
+ * A special case: when the size of thread group is one,
+ * do reduction directly.
+ **/
+
+ // Note: this code provokes warning because it follows a "return"
+
+ //since there is no thread interface yet, just infer from the
+ // result of ballot
+#if 0
+ unsigned tnum = __ballot(1);
+ if (tnum != (~0x0)) { //assume swapSize is 32
+ return 2;
+ }
+
+#if 0
+ if (threadIdx.x == 0) {
+ if ((void *)reductFct != (void *)omp_reduction_op_gpu) {
+ printf("function pointer value is not correct\n");
+ } else {
+ printf("function pointer value is correct\n");
+ }
+ }
+#endif
+
+ //printf("function pointer %p %d %p\n", reductFct, reduce_size, omp_reduction_op_gpu);
+ if (reduce_size == 0) {
+ (*reductFct)((char*)reduce_data, (char*)reduce_data);
+ } else {
+ //omp_reduction_op_gpu((char*)reduce_data, (char*)reduce_data);
+ (*gpu_callback)((char*)reduce_data, (char*)reduce_data);
+ }
+
+ //int **myp = (int **) reduce_data;
+ // the results are with thread 0. Reduce to the shared one
+ if (threadIdx.x == 0) {
+ //printf("function pointer %p %p\n", reductFct, omp_reduction_op);
+ // printf("my result %d\n", *myp[0]);
+ return 1;
+ } else {
+ return 0;
+ }
+#endif
+}
----------------
Commented or #if 0
================
Comment at: libomptarget/deviceRTLs/nvptx/src/reduction.cu:157-178
@@ +156,24 @@
+ // Notice: as above, uncomment if 0 once this code below is ready for shipping
+#if 0
+ unsigned tnum = __ballot(1);
+ if (tnum != (~0x0)) { //assume swapSize is 32
+ return 2;
+ }
+
+ if (threadIdx.x == 0) {
+ printf("choose block reduction\n");
+ }
+
+ (*reductFct)(reduce_data, reduce_data);
+ //omp_reduction_op((char*)reduce_data, (char*)reduce_data);
+
+ int **myp = (int **) reduce_data;
+ // the results are with thread 0. Reduce to the shared one
+ if (threadIdx.x == 0) {
+ printf("my result %d\n", *myp[0]);
+ return 1;
+ } else {
+ return 0;
+ }
+#endif
+}
----------------
Not ready for shipping?
================
Comment at: libomptarget/deviceRTLs/nvptx/src/reduction.cu:780-795
@@ +779,18 @@
+
+#if 0
+template <
+ omptarget_nvptx_BINOP_t binop // enum describing the operation
+>
+INLINE __device__ float Compute<float, binop>(float a, float b) // a is old value, b is new value
+{
+ OpType res = 0;
+ if (binop == omptarget_nvptx_add) res = a + b;
+ if (binop == omptarget_nvptx_sub) res = a - b;
+ if (binop == omptarget_nvptx_mul) res = a * b;
+ if (binop == omptarget_nvptx_div) res = a / b;
+ if (binop == omptarget_nvptx_min) res = a < b ? a : b;
+ if (binop == omptarget_nvptx_max) res = a > b ? a : b;
+ return res;
+}
+#endif
+
----------------
#if 0
================
Comment at: libomptarget/deviceRTLs/nvptx/src/reduction.cu:1001-1028
@@ +1000,30 @@
+ return __shfl_down(val, delta, size);
+#if 0
+ T ret = 0;
+ int localv;
+ int remotev;
+ /* not finished */
+ switch(sizeof(T)) {
+ case 1:
+ case 2:
+ localv = reinterpret_cast<int>(val);
+ remotev = __shfl_down(localv, delta, size);
+ ret = reinterpret_cast<T>(remotev);
+ break;
+ break;
+
+ }
+ return ret;
+#endif
+}
+
+#if 0
+template<>
+INLINE __device__ float myshfldown<float>(float val, unsigned int delta, int size) {
+ int t = __float_as_int(val);
+ int t1 = __shfl_down(t, delta, size);
+ float ret = __int_as_float<float>(t1);
+ return ret;
+}
+#endif
+
----------------
some more #if 0
================
Comment at: libomptarget/deviceRTLs/nvptx/src/reduction.cu:1239-1246
@@ +1238,10 @@
+
+#if 1
+ARRAY_GEN_ALLOP_INTEGER(ARRAYATOMIC_GENOP, fixed1, int8_t);
+ARRAY_GEN_ALLOP_INTEGER(ARRAYATOMIC_GENOP, fixed2, int16_t);
+ARRAY_GEN_ALLOP_INTEGER(ARRAYATOMIC_GENOP, fixed4, int32_t);
+ARRAY_GEN_ALLOP_INTEGER(ARRAYATOMIC_GENOP, fixed8, int64_t);
+ARRAY_GEN_ALLOP_FLOAT(ARRAYATOMIC_GENOP,float4, float);
+ARRAY_GEN_ALLOP_FLOAT(ARRAYATOMIC_GENOP,float8, double);
+#endif
+
----------------
#if 1?
http://reviews.llvm.org/D14031
More information about the Openmp-commits
mailing list