[test-suite] r312482 - Revert "[test-suite] Adding the CLAMR mini-app"
Renato Golin via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 4 04:27:13 PDT 2017
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/mesh.cpp?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.cpp (removed)
@@ -1,10456 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifdef HAVE_MPI
-#include "mpi.h"
-#endif
-
-#include <algorithm>
-#include <unistd.h>
-#include <limits.h>
-#include <time.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-//#include "hsfc.h"
-#include "KDTree.h"
-#include "mesh.h"
-#ifdef HAVE_OPENCL
-#include "ezcl/ezcl.h"
-#endif
-#include "timer.h"
-#ifdef HAVE_MPI
-#include "l7/l7.h"
-#endif
-#include "reduce.h"
-#include "genmalloc.h"
-#include "hash.h"
-
-#define DEBUG 0
-//#define BOUNDS_CHECK 1
-
-#ifndef DEBUG
-#define DEBUG 0
-#endif
-#define DEBUG_RESTORE_VALS 1
-
-typedef int scanInt;
-void scan ( scanInt *input , scanInt *output , scanInt length);
-
-#ifdef _OPENMP
-#undef REZONE_NO_OPTIMIZATION
-#else
-#define REZONE_NO_OPTIMIZATION 1
-#endif
-
-#define TIMING_LEVEL 2
-
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-
-#define IPOW2(a) (2 << (a))
-
-#if defined(MINIMUM_PRECISION)
-#define CONSERVATION_EPS .1
-#define STATE_EPS 15.0
-
-#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
-#define CONSERVATION_EPS .02
-#define STATE_EPS .025
-
-#elif defined(FULL_PRECISION)
-#define CONSERVATION_EPS .02
-#define STATE_EPS .025
-
-#endif
-
-typedef unsigned int uint;
-#ifdef __APPLE_CC__
-typedef unsigned long ulong;
-#endif
-
-#define TWO 2
-#define HALF 0.5
-
-#define __NEW_STENCIL__
-//#define __OLD_STENCIL__
-//#define STENCIL_WARNING 1
-
-#ifdef STENCIL_WARNING
-int do_stencil_warning=1;
-#else
-int do_stencil_warning=0;
-#endif
-
-#ifdef HAVE_OPENCL
-#include "mesh_kernel.inc"
-#endif
-
-extern bool localStencil;
-int calc_neighbor_type;
-bool dynamic_load_balance_on;
-bool neighbor_remap;
-
-#ifdef _OPENMP
-static bool iversion_flag = false;
-#endif
-
-static const char *mesh_timer_descriptor[MESH_TIMER_SIZE] = {
- "mesh_timer_count_BCs",
- "mesh_timer_calc_neighbors",
- "mesh_timer_hash_setup",
- "mesh_timer_hash_query",
- "mesh_timer_find_boundary",
- "mesh_timer_push_setup",
- "mesh_timer_push_boundary",
- "mesh_timer_local_list",
- "mesh_timer_layer1",
- "mesh_timer_layer2",
- "mesh_timer_layer_list",
- "mesh_timer_copy_mesh_data",
- "mesh_timer_fill_mesh_ghost",
- "mesh_timer_fill_neigh_ghost",
- "mesh_timer_set_corner_neigh",
- "mesh_timer_neigh_adjust",
- "mesh_timer_setup_comm",
- "mesh_timer_kdtree_setup",
- "mesh_timer_kdtree_query",
- "mesh_timer_refine_smooth",
- "mesh_timer_rezone_all",
- "mesh_timer_partition",
- "mesh_timer_calc_spatial_coordinates",
- "mesh_timer_load_balance"
-};
-
-#ifdef HAVE_OPENCL
-cl_kernel kernel_hash_adjust_sizes;
-cl_kernel kernel_hash_setup;
-cl_kernel kernel_hash_setup_local;
-cl_kernel kernel_neighbor_init;
-cl_kernel kernel_calc_neighbors;
-cl_kernel kernel_calc_neighbors_local;
-cl_kernel kernel_calc_border_cells;
-cl_kernel kernel_calc_border_cells2;
-cl_kernel kernel_finish_scan;
-cl_kernel kernel_get_border_data;
-cl_kernel kernel_calc_layer1;
-cl_kernel kernel_calc_layer1_sethash;
-cl_kernel kernel_calc_layer2;
-cl_kernel kernel_get_border_data2;
-cl_kernel kernel_calc_layer2_sethash;
-cl_kernel kernel_copy_mesh_data;
-cl_kernel kernel_fill_mesh_ghost;
-cl_kernel kernel_fill_neighbor_ghost;
-cl_kernel kernel_set_corner_neighbor;
-cl_kernel kernel_adjust_neighbors_local;
-cl_kernel kernel_reduction_scan2;
-cl_kernel kernel_reduction_count;
-cl_kernel kernel_reduction_count2;
-cl_kernel kernel_hash_size;
-cl_kernel kernel_finish_hash_size;
-cl_kernel kernel_calc_spatial_coordinates;
-cl_kernel kernel_count_BCs;
-cl_kernel kernel_do_load_balance_lower;
-cl_kernel kernel_do_load_balance_middle;
-cl_kernel kernel_do_load_balance_upper;
-#ifndef MINIMUM_PRECISION
-cl_kernel kernel_do_load_balance_double;
-#endif
-cl_kernel kernel_do_load_balance_float;
-cl_kernel kernel_refine_smooth;
-cl_kernel kernel_coarsen_smooth;
-cl_kernel kernel_coarsen_check_block;
-cl_kernel kernel_rezone_all;
-cl_kernel kernel_rezone_neighbors;
-#ifndef MINIMUM_PRECISION
-cl_kernel kernel_rezone_one_double;
-#endif
-cl_kernel kernel_rezone_one_float;
-cl_kernel kernel_copy_mpot_ghost_data;
-cl_kernel kernel_set_boundary_refinement;
-#endif
-
-extern size_t hash_header_size;
-extern int choose_hash_method;
-
-void Mesh::write_grid(int ncycle)
-{
- FILE *fp;
- char filename[20];
-
- if (ncycle<0) ncycle=0;
- sprintf(filename,"grid%02d.gph",ncycle);
- fp=fopen(filename,"w");
-
- fprintf(fp,"viewport %lf %lf %lf %lf\n",xmin,ymin,xmax,ymax);
- for (uint ic = 0; ic < ncells; ic++) {
- fprintf(fp,"rect %lf %lf %lf %lf\n",x[ic],y[ic],x[ic]+dx[ic],y[ic]+dy[ic]);
- }
-
- fprintf(fp,"line_init %lf %lf\n",x[0]+0.5*dx[0],y[0]+0.5*dy[0]);
- for (uint ic = 1; ic < ncells; ic++){
- fprintf(fp,"line %lf %lf\n",x[ic]+0.5*dx[ic],y[ic]+0.5*dy[ic]);
- }
-
- for (uint ic = 0; ic < ncells; ic++){
- fprintf(fp,"text %lf %lf %d\n",x[ic]+0.5*dx[ic],y[ic]+0.5*dy[ic],ic);
- }
-
- fclose(fp);
-}
-
-Mesh::Mesh(FILE *fin, int *numpe)
-{
- char string[80];
- ibase = 1;
-
- time_t trand;
- time(&trand);
- srand48((long)trand);
-
- if(fgets(string, 80, fin) == NULL) exit(-1);
- sscanf(string,"levmax %d",&levmx);
- if(fgets(string, 80, fin) == NULL) exit(-1);
- sscanf(string,"cells %ld",&ncells);
- if(fgets(string, 80, fin) == NULL) exit(-1);
- sscanf(string,"numpe %d",numpe);
- if(fgets(string, 80, fin) == NULL) exit(-1);
- sscanf(string,"ndim %d",&ndim);
- if(fgets(string, 80, fin) == NULL) exit(-1);
-#ifdef MINIMUM_PRECISION
- sscanf(string,"xaxis %f %f",&xmin, &deltax);
-#else
- sscanf(string,"xaxis %lf %lf",&xmin, &deltax);
-#endif
- if(fgets(string, 80, fin) == NULL) exit(-1);
- sscanf(string,"yaxis %lf %lf",(double*)&ymin, (double*)&deltay);
- if (ndim == THREE_DIMENSIONAL){
- if(fgets(string, 80, fin) == NULL) exit(-1);
- sscanf(string,"zaxis %lf %lf",(double*)&zmin, (double*)&deltaz);
- }
- if(fgets(string, 80, fin) == NULL) exit(-1);
-
- index.resize(ncells);
-
- allocate(ncells);
-
- uint ic=0;
- while(fgets(string, 80, fin)!=NULL){
- sscanf(string, "%d %d %d %d", &(index[ic]), &(i[ic]), &(j[ic]), &(level[ic]));
- ic++;
- }
-
- ibase = 0;
- calc_spatial_coordinates(ibase);
- KDTree_Initialize(&tree);
-
-
- print();
-
- if (ic != ncells) {
- printf("Error -- cells read does not match number specified\n");
- }
- return;
-}
-
-void Mesh::print(void)
-{
- assert(&nlft[0] != NULL);
- assert(&x[0] != NULL);
- assert(&index[0] != NULL);
-
- //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
- printf("index orig index i j lev nlft nrht nbot ntop xlow xhigh ylow yhigh\n");
- for (uint ic=0; ic<ncells; ic++)
- { printf("%6d %6d %4d %4d %4d %4d %4d %4d %4d ", ic, index[ic], i[ic], j[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
- printf("%8.2lf %8.2lf %8.2lf %8.2lf\n", x[ic], x[ic]+dx[ic], y[ic], y[ic]+dy[ic]); }
-}
-
-void Mesh::print_local()
-{ //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
-
- if (mesh_memory.get_memory_size(nlft) >= ncells_ghost){
- fprintf(fp,"%d: index global i j lev nlft nrht nbot ntop \n",mype);
- for (uint ic=0; ic<ncells; ic++) {
- fprintf(fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mype,ic, ic+noffset,i[ic], j[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
- }
- for (uint ic=ncells; ic<ncells_ghost; ic++) {
- fprintf(fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mype,ic, ic+noffset,i[ic], j[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
- }
- } else {
- fprintf(fp,"%d: index i j lev\n",mype);
- for (uint ic=0; ic<ncells_ghost; ic++) {
- fprintf(fp,"%d: %6d %4d %4d %4d \n", mype,ic, i[ic], j[ic], level[ic]);
- }
- }
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::print_dev_local(void)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<int>i_tmp(ncells_ghost);
- vector<int>j_tmp(ncells_ghost);
- vector<int>level_tmp(ncells_ghost);
- vector<int>nlft_tmp(ncells_ghost);
- vector<int>nrht_tmp(ncells_ghost);
- vector<int>nbot_tmp(ncells_ghost);
- vector<int>ntop_tmp(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
-
- //fprintf(fp,"\n%d: Printing mesh for dev_local\n\n",mype);
-
- fprintf(fp,"%d: index global i j lev nlft nrht nbot ntop \n",mype);
- for (uint ic=0; ic<MAX(ncells_ghost,ncells); ic++) {
- fprintf(fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mype,ic, ic+noffset,i_tmp[ic], j_tmp[ic], level_tmp[ic], nlft_tmp[ic], nrht_tmp[ic], nbot_tmp[ic], ntop_tmp[ic]);
- }
- //fprintf(fp,"\n%d: Finished printing mesh for dev_local\n\n",mype);
-}
-
-void Mesh::compare_dev_local_to_local(void)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<int>i_tmp(ncells_ghost);
- vector<int>j_tmp(ncells_ghost);
- vector<int>level_tmp(ncells_ghost);
- vector<int>nlft_tmp(ncells_ghost);
- vector<int>nrht_tmp(ncells_ghost);
- vector<int>nbot_tmp(ncells_ghost);
- vector<int>ntop_tmp(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
-
- fprintf(fp,"\n%d: Comparing mesh for dev_local to local\n\n",mype);
- //fprintf(fp,"%d: index global i j lev nlft nrht nbot ntop \n",mype);
- for (uint ic=0; ic<ncells_ghost; ic++) {
- if (i_tmp[ic] != i[ic] ) fprintf(fp,"%d: Error: cell %d dev_i %d i %d\n",mype,ic,i_tmp[ic], i[ic]);
- if (j_tmp[ic] != j[ic] ) fprintf(fp,"%d: Error: cell %d dev_j %d j %d\n",mype,ic,j_tmp[ic], j[ic]);
- if (level_tmp[ic] != level[ic]) fprintf(fp,"%d: Error: cell %d dev_level %d level %d\n",mype,ic,level_tmp[ic],level[ic]);
-
- //fprintf(fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mype,ic, ic+noffset,i_tmp[ic], j_tmp[ic], level_tmp[ic], nlft_tmp[ic], nrht_tmp[ic], nbot_tmp[ic], ntop_tmp[ic]);
- }
- fprintf(fp,"\n%d: Finished comparing mesh for dev_local to local\n\n",mype);
-}
-
-void Mesh::compare_neighbors_gpu_global_to_cpu_global()
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<int>nlft_check(ncells);
- vector<int>nrht_check(ncells);
- vector<int>nbot_check(ncells);
- vector<int>ntop_check(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells*sizeof(cl_int), &nlft_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells*sizeof(cl_int), &nrht_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells*sizeof(cl_int), &nbot_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells*sizeof(cl_int), &ntop_check[0], NULL);
-
- //printf("\n%d: Comparing neighbors for gpu_global to cpu_global\n\n",mype);
- for (uint ic=0; ic<ncells; ic++) {
- if (nlft[ic] != nlft_check[ic]) printf("DEBUG -- nlft: ic %d nlft %d nlft_check %d\n",ic, nlft[ic], nlft_check[ic]);
- if (nrht[ic] != nrht_check[ic]) printf("DEBUG -- nrht: ic %d nrht %d nrht_check %d\n",ic, nrht[ic], nrht_check[ic]);
- if (nbot[ic] != nbot_check[ic]) printf("DEBUG -- nbot: ic %d nbot %d nbot_check %d\n",ic, nbot[ic], nbot_check[ic]);
- if (ntop[ic] != ntop_check[ic]) printf("DEBUG -- ntop: ic %d ntop %d ntop_check %d\n",ic, ntop[ic], ntop_check[ic]);
- }
- //printf("\n%d: Finished comparing mesh for dev_local to local\n\n",mype);
-}
-#endif
-
-void Mesh::compare_neighbors_cpu_local_to_cpu_global(uint ncells_ghost, uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl)
-{
-
-#ifdef HAVE_MPI
- int *nlft_global = mesh_global->nlft;
- int *nrht_global = mesh_global->nrht;
- int *nbot_global = mesh_global->nbot;
- int *ntop_global = mesh_global->ntop;
-
- vector<int> Test(ncells_ghost);
- for(uint ic=0; ic<ncells; ic++){
- Test[ic] = mype*1000 +ic;
- }
- if (numpe > 1) L7_Update(&Test[0], L7_INT, cell_handle);
-
- vector<int> Test_global(ncells_global);
- MPI_Allgatherv(&Test[0], nsizes[mype], MPI_INT, &Test_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- vector<int> Test_check(ncells);
- vector<int> Test_check_global(ncells_global);
-
- // ==================== check left value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nlft[ic]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nlft_global[ic]] != Test_check_global[ic]) {
- if (mype == 0) printf("%d: Error with nlft for cell %d -- nlft %d global %d check %d\n",mype,ic,nlft_global[ic],Test_global[nlft_global[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check left left value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nlft[nlft[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nlft_global[nlft_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with nlft nlft for cell %5d -- nlftg %5d nlftg nlftg %5d global %5d\n",
- mype,ic,nlft_global[ic],nlft_global[nlft_global[ic]],Test_global[nlft_global[nlft_global[ic]]]);
- printf("%d: check %5d -- nlftl %5d nlftl nlftl %5d check %5d\n",
- mype,ic,nlft[ic],nlft[nlft[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check right value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nrht[ic]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nrht_global[ic]] != Test_check_global[ic]) {
- if (mype == 0) printf("%d: Error with nrht for cell %d -- %d %d\n",mype,ic,Test_global[nrht_global[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check right right value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nrht[nrht[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nrht_global[nrht_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with nrht nrht for cell %5d -- nrhtg %5d nrhtg nrhtg %5d global %5d\n",
- mype,ic,nrht_global[ic],nrht_global[nrht_global[ic]],Test_global[nrht_global[nrht_global[ic]]]);
- printf("%d: check %5d -- nrhtl %5d nrhtl nrhtl %5d check %5d\n",
- mype,ic,nrht[ic],nrht[nrht[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check bottom value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nbot[ic]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nbot_global[ic]] != Test_check_global[ic]) {
- if (mype == 0) printf("%d: Error with nbot for cell %d -- %d %d\n",mype,ic,Test_global[nbot_global[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check bottom bottom value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nbot[nbot[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nbot_global[nbot_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with nbot nbot for cell %5d -- nbotg %5d nbotg nbotg %5d global %5d\n",
- mype,ic,nbot_global[ic],nbot_global[nbot_global[ic]],Test_global[nbot_global[nbot_global[ic]]]);
- printf("%d: check %5d -- nbotl %5d nbotl nbotl %5d check %5d\n",
- mype,ic,nbot[ic],nbot[nbot[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check top value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[ntop[ic]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[ntop_global[ic]] != Test_check_global[ic]) {
- if (mype == 0) printf("%d: Error with ntop for cell %d -- %d %d\n",mype,ic,Test_global[ntop_global[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check top top value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[ntop[ntop[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[ntop_global[ntop_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with ntop ntop for cell %5d -- ntopg %5d ntopg ntopg %5d global %5d\n",
- mype,ic,ntop_global[ic],ntop_global[ntop_global[ic]],Test_global[ntop_global[ntop_global[ic]]]);
- printf("%d: check %5d -- ntopl %5d ntopl ntopl %5d check %5d\n",
- mype,ic,ntop[ic],ntop[ntop[ic]],Test_check_global[ic]);
- }
- }
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- ncells_global %d ncells_ghost %d mesh_global %p nsizes[0] %d ndispl[0] %d\n",
- ncells_global,ncells_ghost,mesh_global,nsizes[0],ndispl[0]);
-#endif
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::compare_neighbors_all_to_gpu_local(Mesh *mesh_global, int *nsizes, int *ndispl)
-//uint ncells_ghost, uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl)
-{
-#ifdef HAVE_MPI
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- size_t &ncells_global = mesh_global->ncells;
- int *nlft_global = mesh_global->nlft;
- int *nrht_global = mesh_global->nrht;
- int *nbot_global = mesh_global->nbot;
- int *ntop_global = mesh_global->ntop;
-
- // Checking CPU parallel to CPU global
- vector<int> Test(ncells_ghost);
- for(uint ic=0; ic<ncells; ic++){
- Test[ic] = mype*1000 +ic;
- }
- if (numpe > 1) L7_Update(&Test[0], L7_INT, cell_handle);
-
- vector<int> Test_global(ncells_global);
- MPI_Allgatherv(&Test[0], nsizes[mype], MPI_INT, &Test_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- vector<int> Test_check(ncells);
- vector<int> Test_check_global(ncells_global);
-
- // ==================== check left value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nlft[ic]];
- //if (mype == 1 && ic==0) printf("%d: nlft check for ic 0 is %d\n",mype,nlft[0]);
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- //if (Test_global[nlft_global[ic]] != Test_check_global[ic]) {
- //if (mype == 0) printf("%d: Error with nlft for cell %d -- nlft %d global %d check %d\n",mype,ic,nlft_global[ic],Test_global[nlft_global[ic]],Test_check_global[ic]);
- //}
- }
-
- // ==================== check left left value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nlft[nlft[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nlft_global[nlft_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with nlft nlft for cell %5d -- nlftg %5d nlftg nlftg %5d global %5d\n",
- mype,ic,nlft_global[ic],nlft_global[nlft_global[ic]],Test_global[nlft_global[nlft_global[ic]]]);
- printf("%d: check %5d -- nlftl %5d nlftl nlftl %5d check %5d\n",
- mype,ic,nlft[ic],nlft[nlft[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check right value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nrht[ic]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nrht_global[ic]] != Test_check_global[ic]) {
- if (mype == 0) printf("%d: Error with nrht for cell %d -- %d %d\n",mype,ic,Test_global[nrht_global[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check right right value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nrht[nrht[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nrht_global[nrht_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with nrht nrht for cell %5d -- nrhtg %5d nrhtg nrhtg %5d global %5d\n",
- mype,ic,nrht_global[ic],nrht_global[nrht_global[ic]],Test_global[nrht_global[nrht_global[ic]]]);
- printf("%d: check %5d -- nrhtl %5d nrhtl nrhtl %5d check %5d\n",
- mype,ic,nrht[ic],nrht[nrht[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check bottom value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nbot[ic]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nbot_global[ic]] != Test_check_global[ic]) {
- if (mype == 0) printf("%d: Error with nbot for cell %d -- %d %d\n",mype,ic,Test_global[nbot_global[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check bottom bottom value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[nbot[nbot[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[nbot_global[nbot_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with nbot nbot for cell %5d -- nbotg %5d nbotg nbotg %5d global %5d\n",
- mype,ic,nbot_global[ic],nbot_global[nbot_global[ic]],Test_global[nbot_global[nbot_global[ic]]]);
- printf("%d: check %5d -- nbotl %5d nbotl nbotl %5d check %5d\n",
- mype,ic,nbot[ic],nbot[nbot[ic]],Test_check_global[ic]);
- }
- }
- // ==================== check top value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[ntop[ic]];
- }
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[ntop_global[ic]] != Test_check_global[ic]) {
- if (mype == 0) printf("%d: Error with ntop for cell %d -- %d %d\n",mype,ic,Test_global[ntop_global[ic]],Test_check_global[ic]);
- }
- }
-
- // ==================== check top top value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[ntop[ntop[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[ntop_global[ntop_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with ntop ntop for cell %5d -- ntopg %5d ntopg ntopg %5d global %5d\n",
- mype,ic,ntop_global[ic],ntop_global[ntop_global[ic]],Test_global[ntop_global[ntop_global[ic]]]);
- printf("%d: check %5d -- ntopl %5d ntopl ntopl %5d check %5d\n",
- mype,ic,ntop[ic],ntop[ntop[ic]],Test_check_global[ic]);
- }
- }
- // checking gpu results
- vector<int> nlft_check(ncells_ghost); vector<int> nrht_check(ncells_ghost);
- vector<int> nbot_check(ncells_ghost); vector<int> ntop_check(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_check[0], NULL);
-
- for (uint ic=0; ic<ncells_ghost; ic++){
- if (nlft[ic] != nlft_check[ic]) printf("%d: Error with gpu calculated nlft for cell %d nlft %d check %d\n",mype,ic,nlft[ic],nlft_check[ic]);
- if (nrht[ic] != nrht_check[ic]) printf("%d: Error with gpu calculated nrht for cell %d nrht %d check %d\n",mype,ic,nrht[ic],nrht_check[ic]);
- if (nbot[ic] != nbot_check[ic]) printf("%d: Error with gpu calculated nbot for cell %d nbot %d check %d\n",mype,ic,nbot[ic],nbot_check[ic]);
- if (ntop[ic] != ntop_check[ic]) printf("%d: Error with gpu calculated ntop for cell %d ntop %d check %d\n",mype,ic,ntop[ic],ntop_check[ic]);
- }
-
- // ==================== check top top value ====================
- for (uint ic=0; ic<ncells; ic++){
- Test_check[ic] = Test[ntop[ntop[ic]]];
- }
-
- MPI_Allgatherv(&Test_check[0], nsizes[mype], MPI_INT, &Test_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
- for (uint ic=0; ic<ncells_global; ic++){
- if (Test_global[ntop_global[ntop_global[ic]]] != Test_check_global[ic]) {
- printf("%d: Error with ntop ntop for cell %5d -- ntopg %5d ntopg ntopg %5d global %5d\n",
- mype,ic,ntop_global[ic],ntop_global[ntop_global[ic]],Test_global[ntop_global[ntop_global[ic]]]);
- printf("%d: check %5d -- ntopl %5d ntopl ntopl %5d check %5d\n",
- mype,ic,ntop[ic],ntop[ntop[ic]],Test_check_global[ic]);
- }
- }
- // checking gpu results
- //vector<int> nlft_check(ncells_ghost); vector<int> nrht_check(ncells_ghost);
- //vector<int> nbot_check(ncells_ghost); vector<int> ntop_check(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_check[0], NULL);
-
- for (uint ic=0; ic<ncells_ghost; ic++){
- if (nlft[ic] != nlft_check[ic]) printf("%d: Error with gpu calculated nlft for cell %d nlft %d check %d\n",mype,ic,nlft[ic],nlft_check[ic]);
- if (nrht[ic] != nrht_check[ic]) printf("%d: Error with gpu calculated nrht for cell %d nrht %d check %d\n",mype,ic,nrht[ic],nrht_check[ic]);
- if (nbot[ic] != nbot_check[ic]) printf("%d: Error with gpu calculated nbot for cell %d nbot %d check %d\n",mype,ic,nbot[ic],nbot_check[ic]);
- if (ntop[ic] != ntop_check[ic]) printf("%d: Error with gpu calculated ntop for cell %d ntop %d check %d\n",mype,ic,ntop[ic],ntop_check[ic]);
- }
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- mesh_global %p nsizes[0] %d ndispl[0] %d\n",
- mesh_global,nsizes[0],ndispl[0]);
-#endif
-}
-
-void Mesh::compare_indices_gpu_global_to_cpu_global(void)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<int> i_check(ncells);
- vector<int> j_check(ncells);
- vector<int> level_check(ncells);
- vector<int> celltype_check(ncells);
- /// Set read buffers for data.
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells*sizeof(cl_int), &j_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells*sizeof(cl_int), &level_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, 0, ncells*sizeof(cl_int), &celltype_check[0], NULL);
- for (uint ic = 0; ic < ncells; ic++){
- if (i[ic] != i_check[ic] ) printf("DEBUG -- i: ic %d i %d i_check %d\n",ic, i[ic], i_check[ic]);
- if (j[ic] != j_check[ic] ) printf("DEBUG -- j: ic %d j %d j_check %d\n",ic, j[ic], j_check[ic]);
- if (level[ic] != level_check[ic] ) printf("DEBUG -- level: ic %d level %d level_check %d\n",ic, level[ic], level_check[ic]);
- if (celltype[ic] != celltype_check[ic] ) printf("DEBUG -- celltype: ic %d celltype %d celltype_check %d\n",ic, celltype[ic], celltype_check[ic]);
- }
-}
-#endif
-
-void Mesh::compare_indices_cpu_local_to_cpu_global(uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl, int cycle)
-{
- int *celltype_global = mesh_global->celltype;
- int *i_global = mesh_global->i;
- int *j_global = mesh_global->j;
- int *level_global = mesh_global->level;
-
- vector<int> i_check_global(ncells_global);
- vector<int> j_check_global(ncells_global);
- vector<int> level_check_global(ncells_global);
- vector<int> celltype_check_global(ncells_global);
-
-/*
- vector<int> i_check_local(ncells);
- vector<int> j_check_local(ncells);
- vector<int> level_check_local(ncells);
- vector<int> celltype_check_local(ncells);
-*/
-
-#ifdef HAVE_MPI
- MPI_Allgatherv(&celltype[0], nsizes[mype], MPI_INT, &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&i[0], nsizes[mype], MPI_INT, &i_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&j[0], nsizes[mype], MPI_INT, &j_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&level[0], nsizes[mype], MPI_INT, &level_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-
-/*
- MPI_Scatterv(&celltype_global[0], &nsizes[0], &ndispl[0], MPI_INT, &celltype_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Scatterv(&i_global[0], &nsizes[0], &ndispl[0], MPI_INT, &i_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Scatterv(&j_global[0], &nsizes[0], &ndispl[0], MPI_INT, &j_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
- MPI_Scatterv(&level_global[0], &nsizes[0], &ndispl[0], MPI_INT, &level_check_local[0], nsizes[mype], MPI_INT, 0, MPI_COMM_WORLD);
-*/
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d\n",
- nsizes[0],ndispl[0]);
-#endif
-
- for (uint ic = 0; ic < ncells_global; ic++){
- if (celltype_global[ic] != celltype_check_global[ic]) printf("DEBUG rezone 3 at cycle %d celltype_global & celltype_check_global %d %d %d \n",cycle,ic,celltype_global[ic],celltype_check_global[ic]);
- if (i_global[ic] != i_check_global[ic]) printf("DEBUG rezone 3 at cycle %d i_global & i_check_global %d %d %d \n",cycle,ic,i_global[ic],i_check_global[ic]);
- if (j_global[ic] != j_check_global[ic]) printf("DEBUG rezone 3 at cycle %d j_global & j_check_global %d %d %d \n",cycle,ic,j_global[ic],j_check_global[ic]);
- if (level_global[ic] != level_check_global[ic]) printf("DEBUG rezone 3 at cycle %d level_global & level_check_global %d %d %d \n",cycle,ic,level_global[ic],level_check_global[ic]);
- }
-
-/*
- for (uint ic = 0; ic < ncells; ic++){
- if (celltype[ic] != celltype_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d celltype & celltype_check_local %d %d %d \n",cycle,ic,celltype[ic],celltype_check_local[ic]);
- if (i[ic] != i_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d i & i_check_local %d %d %d \n",cycle,ic,i[ic],i_check_local[ic]);
- if (j[ic] != j_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d j & j_check_local %d %d %d \n",cycle,ic,j[ic],j_check_local[ic]);
- if (level[ic] != level_check_local[ic]) fprintf(fp,"DEBUG rezone 3 at cycle %d level & level_check_local %d %d %d \n",cycle,ic,level[ic],level_check_local[ic]);
- }
-*/
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::compare_indices_all_to_gpu_local(Mesh *mesh_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle)
-{
-#ifdef HAVE_MPI
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- int *level_global = mesh_global->level;
- int *celltype_global = mesh_global->celltype;
- int *i_global = mesh_global->i;
- int *j_global = mesh_global->j;
-
- cl_mem &dev_celltype_global = mesh_global->dev_celltype;
- cl_mem &dev_i_global = mesh_global->dev_i;
- cl_mem &dev_j_global = mesh_global->dev_j;
- cl_mem &dev_level_global = mesh_global->dev_level;
-
- // Need to compare dev_H to H, etc
- vector<int> level_check(ncells);
- vector<int> celltype_check(ncells);
- vector<int> i_check(ncells);
- vector<int> j_check(ncells);
- /// Set read buffers for data.
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells*sizeof(cl_int), &level_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_FALSE, 0, ncells*sizeof(cl_int), &celltype_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_TRUE, 0, ncells*sizeof(cl_int), &j_check[0], NULL);
- for (uint ic = 0; ic < ncells; ic++){
- if (level[ic] != level_check[ic] ) printf("%d: DEBUG rezone 1 cell %d level %d level_check %d\n",mype, ic, level[ic], level_check[ic]);
- if (celltype[ic] != celltype_check[ic] ) printf("%d: DEBUG rezone 1 cell %d celltype %d celltype_check %d\n",mype, ic, celltype[ic], celltype_check[ic]);
- if (i[ic] != i_check[ic] ) printf("%d: DEBUG rezone 1 cell %d i %d i_check %d\n",mype, ic, i[ic], i_check[ic]);
- if (j[ic] != j_check[ic] ) printf("%d: DEBUG rezone 1 cell %d j %d j_check %d\n",mype, ic, j[ic], j_check[ic]);
- }
-
- // And compare dev_H gathered to H_global, etc
- vector<int>celltype_check_global(ncells_global);
- vector<int>i_check_global(ncells_global);
- vector<int>j_check_global(ncells_global);
- vector<int>level_check_global(ncells_global);
- MPI_Allgatherv(&celltype_check[0], nsizes[mype], MPI_INT, &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&i_check[0], nsizes[mype], MPI_INT, &i_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&j_check[0], nsizes[mype], MPI_INT, &j_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&level_check[0], nsizes[mype], MPI_INT, &level_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (uint ic = 0; ic < ncells_global; ic++){
- if (level_global[ic] != level_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d level_global %d level_check_global %d\n",mype, ic, level_global[ic], level_check_global[ic]);
- if (celltype_global[ic] != celltype_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d celltype_global %d celltype_check_global %d\n",mype, ic, celltype_global[ic], celltype_check_global[ic]);
- if (i_global[ic] != i_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d i_global %d i_check_global %d\n",mype, ic, i_global[ic], i_check_global[ic]);
- if (j_global[ic] != j_check_global[ic] ) printf("%d: DEBUG rezone 2 cell %d j_global %d j_check_global %d\n",mype, ic, j_global[ic], j_check_global[ic]);
- }
-
- // And compare H gathered to H_global, etc
- MPI_Allgatherv(&celltype[0], nsizes[mype], MPI_INT, &celltype_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&i[0], nsizes[mype], MPI_INT, &i_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&j[0], nsizes[mype], MPI_INT, &j_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- MPI_Allgatherv(&level[0], nsizes[mype], MPI_INT, &level_check_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (uint ic = 0; ic < ncells_global; ic++){
- if (celltype_global[ic] != celltype_check_global[ic]) printf("DEBUG rezone 3 at cycle %d celltype_global & celltype_check_global %d %d %d \n",ncycle,ic,celltype_global[ic],celltype_check_global[ic]);
- if (i_global[ic] != i_check_global[ic]) printf("DEBUG rezone 3 at cycle %d i_global & i_check_global %d %d %d \n",ncycle,ic,i_global[ic],i_check_global[ic]);
- if (j_global[ic] != j_check_global[ic]) printf("DEBUG rezone 3 at cycle %d j_global & j_check_global %d %d %d \n",ncycle,ic,j_global[ic],j_check_global[ic]);
- if (level_global[ic] != level_check_global[ic]) printf("DEBUG rezone 3 at cycle %d level_global & level_check_global %d %d %d \n",ncycle,ic,level_global[ic],level_check_global[ic]);
- }
-
- // Now the global dev_H_global to H_global, etc
- ezcl_enqueue_read_buffer(command_queue, dev_celltype_global, CL_FALSE, 0, ncells_global*sizeof(cl_int), &celltype_check_global[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_i_global, CL_FALSE, 0, ncells_global*sizeof(cl_int), &i_check_global[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j_global, CL_FALSE, 0, ncells_global*sizeof(cl_int), &j_check_global[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level_global, CL_TRUE, 0, ncells_global*sizeof(cl_int), &level_check_global[0], NULL);
- for (uint ic = 0; ic < ncells_global; ic++){
- if (celltype_global[ic] != celltype_check_global[ic]) printf("DEBUG rezone 4 at cycle %d celltype_global & celltype_check_global %d %d %d \n",ncycle,ic,celltype_global[ic],celltype_check_global[ic]);
- if (i_global[ic] != i_check_global[ic]) printf("DEBUG rezone 4 at cycle %d i_global & i_check_global %d %d %d \n",ncycle,ic,i_global[ic],i_check_global[ic]);
- if (j_global[ic] != j_check_global[ic]) printf("DEBUG rezone 4 at cycle %d j_global & j_check_global %d %d %d \n",ncycle,ic,j_global[ic],j_check_global[ic]);
- if (level_global[ic] != level_check_global[ic]) printf("DEBUG rezone 4 at cycle %d level_global & level_check_global %d %d %d \n",ncycle,ic,level_global[ic],level_check_global[ic]);
- }
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- mesh_global %p ncells_global %d nsizes[0] %d ndispl[0] %d ncycle %d\n",
- mesh_global,ncells_global,nsizes[0],ndispl[0],ncycle);
-#endif
-}
-
-void Mesh::compare_coordinates_gpu_global_to_cpu_global_double(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, double *H)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<spatial_t>x_check(ncells);
- vector<spatial_t>dx_check(ncells);
- vector<spatial_t>y_check(ncells);
- vector<spatial_t>dy_check(ncells);
- vector<double>H_check(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_x, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &x_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_dx, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dx_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_y, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &y_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_dy, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dy_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_H, CL_TRUE, 0, ncells*sizeof(cl_double), &H_check[0], NULL);
- for (uint ic = 0; ic < ncells; ic++){
- if (x[ic] != x_check[ic] || dx[ic] != dx_check[ic] || y[ic] != y_check[ic] || dy[ic] != dy_check[ic] ) {
- printf("Error -- mismatch in spatial coordinates for cell %d is gpu %lf %lf %lf %lf cpu %lf %lf %lf %lf\n",ic,x_check[ic],dx_check[ic],y_check[ic],dy_check[ic],x[ic],dx[ic],y[ic],dy[ic]);
- exit(0);
- }
- }
- for (uint ic = 0; ic < ncells; ic++){
- if (fabs(H[ic] - H_check[ic]) > CONSERVATION_EPS) {
- printf("Error -- mismatch in H for cell %d is gpu %lf cpu %lf\n",ic,H_check[ic],H[ic]);
- exit(0);
- }
- }
-}
-
-void Mesh::compare_coordinates_gpu_global_to_cpu_global_float(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, float *H)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<spatial_t>x_check(ncells);
- vector<spatial_t>dx_check(ncells);
- vector<spatial_t>y_check(ncells);
- vector<spatial_t>dy_check(ncells);
- vector<float>H_check(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_x, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &x_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_dx, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dx_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_y, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &y_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_dy, CL_FALSE, 0, ncells*sizeof(cl_spatial_t), &dy_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_H, CL_TRUE, 0, ncells*sizeof(cl_float), &H_check[0], NULL);
- for (uint ic = 0; ic < ncells; ic++){
- if (x[ic] != x_check[ic] || dx[ic] != dx_check[ic] || y[ic] != y_check[ic] || dy[ic] != dy_check[ic] ) {
- printf("Error -- mismatch in spatial coordinates for cell %d is gpu %lf %lf %lf %lf cpu %lf %lf %lf %lf\n",ic,x_check[ic],dx_check[ic],y_check[ic],dy_check[ic],x[ic],dx[ic],y[ic],dy[ic]);
- exit(0);
- }
- }
- for (uint ic = 0; ic < ncells; ic++){
- if (fabs(H[ic] - H_check[ic]) > CONSERVATION_EPS) {
- printf("Error -- mismatch in H for cell %d is gpu %lf cpu %lf\n",ic,H_check[ic],H[ic]);
- exit(0);
- }
- }
-}
-#endif
-
-void Mesh::compare_coordinates_cpu_local_to_cpu_global_double(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, double *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, double *H_global, int cycle)
-{
- vector<spatial_t> x_check_global(ncells_global);
- vector<spatial_t> dx_check_global(ncells_global);
- vector<spatial_t> y_check_global(ncells_global);
- vector<spatial_t> dy_check_global(ncells_global);
- vector<double> H_check_global(ncells_global);
-
-#ifdef HAVE_MPI
- MPI_Allgatherv(&x[0], nsizes[mype], MPI_SPATIAL_T, &x_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&dx[0], nsizes[mype], MPI_SPATIAL_T, &dx_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&y[0], nsizes[mype], MPI_SPATIAL_T, &y_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&dy[0], nsizes[mype], MPI_SPATIAL_T, &dy_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&H[0], nsizes[mype], MPI_DOUBLE, &H_check_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d x %p dx %p y %p dy %p H %p\n",
- nsizes[0],ndispl[0],x,dx,y,dy,H);
-#endif
-
- for (uint ic = 0; ic < ncells_global; ic++){
- if (fabs(x_global[ic] -x_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d x_global & x_check_global %d %lf %lf \n",cycle,ic,x_global[ic], x_check_global[ic]);
- if (fabs(dx_global[ic]-dx_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dx_global & dx_check_global %d %lf %lf \n",cycle,ic,dx_global[ic],dx_check_global[ic]);
- if (fabs(y_global[ic] -y_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d y_global & y_check_global %d %lf %lf \n",cycle,ic,y_global[ic], y_check_global[ic]);
- if (fabs(dy_global[ic]-dy_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dy_global & dy_check_global %d %lf %lf \n",cycle,ic,dy_global[ic],dy_check_global[ic]);
- if (fabs(H_global[ic] -H_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d H_global & H_check_global %d %lf %lf \n",cycle,ic,H_global[ic], H_check_global[ic]);
- }
-
-}
-
-void Mesh::compare_coordinates_cpu_local_to_cpu_global_float(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, float *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, float *H_global, int cycle)
-{
- vector<spatial_t> x_check_global(ncells_global);
- vector<spatial_t> dx_check_global(ncells_global);
- vector<spatial_t> y_check_global(ncells_global);
- vector<spatial_t> dy_check_global(ncells_global);
- vector<float> H_check_global(ncells_global);
-
-#ifdef HAVE_MPI
- MPI_Allgatherv(&x[0], nsizes[mype], MPI_SPATIAL_T, &x_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&dx[0], nsizes[mype], MPI_SPATIAL_T, &dx_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&y[0], nsizes[mype], MPI_SPATIAL_T, &y_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&dy[0], nsizes[mype], MPI_SPATIAL_T, &dy_check_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&H[0], nsizes[mype], MPI_FLOAT, &H_check_global[0], &nsizes[0], &ndispl[0], MPI_FLOAT, MPI_COMM_WORLD);
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d x %p dx %p y %p dy %p H %p\n",
- nsizes[0],ndispl[0],x,dx,y,dy,H);
-#endif
-
- for (uint ic = 0; ic < ncells_global; ic++){
- if (fabs(x_global[ic] -x_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d x_global & x_check_global %d %lf %lf \n",cycle,ic,x_global[ic], x_check_global[ic]);
- if (fabs(dx_global[ic]-dx_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dx_global & dx_check_global %d %lf %lf \n",cycle,ic,dx_global[ic],dx_check_global[ic]);
- if (fabs(y_global[ic] -y_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d y_global & y_check_global %d %lf %lf \n",cycle,ic,y_global[ic], y_check_global[ic]);
- if (fabs(dy_global[ic]-dy_check_global[ic]) > STATE_EPS) printf("DEBUG graphics at cycle %d dy_global & dy_check_global %d %lf %lf \n",cycle,ic,dy_global[ic],dy_check_global[ic]);
- if (fabs(H_global[ic] -H_check_global[ic] ) > STATE_EPS) printf("DEBUG graphics at cycle %d H_global & H_check_global %d %lf %lf \n",cycle,ic,H_global[ic], H_check_global[ic]);
- }
-
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::compare_mpot_gpu_global_to_cpu_global(int *mpot, cl_mem dev_mpot)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<int>mpot_check(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE, 0, ncells*sizeof(cl_int), &mpot_check[0], NULL);
-
- for (uint ic=0; ic<ncells; ic++) {
- if (mpot[ic] != mpot_check[ic]) printf("DEBUG -- mpot: ic %d mpot %d mpot_check %d\n",ic, mpot[ic], mpot_check[ic]);
- }
-}
-#endif
-
-void Mesh::compare_mpot_cpu_local_to_cpu_global(uint ncells_global, int *nsizes, int *ndispl, int *mpot, int *mpot_global, int cycle)
-{
- vector<int>mpot_save_global(ncells_global);
-#ifdef HAVE_MPI
- MPI_Allgatherv(&mpot[0], ncells, MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- nsizes[0] %d ndispl[0] %d mpot %p\n",
- nsizes[0],ndispl[0],mpot);
-#endif
- for (uint ic = 0; ic < ncells_global; ic++){
- if (mpot_global[ic] != mpot_save_global[ic]) {
- if (mype == 0) printf("%d: DEBUG refine_potential 3 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,cycle,ic,mpot_global[ic],mpot_save_global[ic]);
- }
- }
-
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::compare_mpot_all_to_gpu_local(int *mpot, int *mpot_global, cl_mem dev_mpot, cl_mem dev_mpot_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle)
-{
-#ifdef HAVE_MPI
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- // Need to compare dev_mpot to mpot
- vector<int>mpot_save(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE, 0, ncells*sizeof(cl_int), &mpot_save[0], NULL);
- for (uint ic = 0; ic < ncells; ic++){
- if (mpot[ic] != mpot_save[ic]) {
- printf("%d: DEBUG refine_potential 1 at cycle %d cell %d mpot & mpot_save %d %d \n",mype,ncycle,ic,mpot[ic],mpot_save[ic]);
- }
- }
-
- // Compare dev_mpot to mpot_global
- vector<int>mpot_save_global(ncells_global);
- MPI_Allgatherv(&mpot_save[0], nsizes[mype], MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (uint ic = 0; ic < ncells_global; ic++){
- if (mpot_global[ic] != mpot_save_global[ic]) {
- if (mype == 0) printf("%d: DEBUG refine_potential 2 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]);
- }
- }
-
- // Compare mpot to mpot_global
- MPI_Allgatherv(&mpot[0], nsizes[mype], MPI_INT, &mpot_save_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (uint ic = 0; ic < ncells_global; ic++){
- if (mpot_global[ic] != mpot_save_global[ic]) {
- if (mype == 0) printf("%d: DEBUG refine_potential 3 at cycle %d cell %d mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]);
- }
- }
-
- // Compare dev_mpot_global to mpot_global
- ezcl_enqueue_read_buffer(command_queue, dev_mpot_global, CL_TRUE, 0, ncells_global*sizeof(cl_int), &mpot_save_global[0], NULL);
- for (uint ic = 0; ic < ncells_global; ic++){
- if (mpot_global[ic] != mpot_save_global[ic]) {
- if (mype == 0) printf("%d: DEBUG refine_potential 4 at cycle %d cell %u mpot_global & mpot_save_global %d %d \n",mype,ncycle,ic,mpot_global[ic],mpot_save_global[ic]);
- }
- }
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- mpot %p mpot_global %p dev_mpot %p dev_mpot_global %p ncells_global %d nsizes[0] %d ndispl[0] %d ncycle %d\n",
- mpot,mpot_global,dev_mpot,dev_mpot_global,ncells_global,nsizes[0],ndispl[0],ncycle);
-#endif
-}
-
-void Mesh::compare_ioffset_gpu_global_to_cpu_global(uint old_ncells, int *mpot)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- size_t local_work_size = MIN(ncells, TILE_SIZE);
- size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
-
- //size_t block_size = (ncells + TILE_SIZE - 1) / TILE_SIZE; // For on-device global reduction kernel.
- size_t block_size = global_work_size/local_work_size;
-
- vector<int> ioffset_check(block_size);
- ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, block_size*sizeof(cl_int), &ioffset_check[0], NULL);
-
- int mcount, mtotal;
- mtotal = 0;
- for (uint ig=0; ig<(old_ncells+TILE_SIZE-1)/TILE_SIZE; ig++){
- mcount = 0;
- for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){
- if (ic >= old_ncells) break;
-
- if (mpot[ic] < 0) {
- if (celltype[ic] == REAL_CELL) {
- // remove all but cell that will remain to get count right when split
- // across processors
- if (is_lower_left(i[ic],j[ic]) ) mcount++;
- } else {
- // either upper right or lower left will remain for boundary cells
- if (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) mcount++;
- }
- }
- if (mpot[ic] >= 0) {
- if (celltype[ic] == REAL_CELL){
- mcount += mpot[ic] ? 4 : 1;
- } else {
- mcount += mpot[ic] ? 2 : 1;
- }
- }
- }
- if (mtotal != ioffset_check[ig]) printf("DEBUG ig %d ioffset %d mcount %d\n",ig,ioffset_check[ig],mtotal);
- mtotal += mcount;
- }
-}
-
-void Mesh::compare_ioffset_all_to_gpu_local(uint old_ncells, uint old_ncells_global, int block_size, int block_size_global, int *mpot, int *mpot_global, cl_mem dev_ioffset, cl_mem dev_ioffset_global, int *ioffset, int *ioffset_global, int *celltype_global, int *i_global, int *j_global)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- // This compares ioffset for each block in the calculation
- ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, block_size*sizeof(cl_int), &ioffset[0], NULL);
- int mtotal = 0;
- for (uint ig=0; ig<(old_ncells+TILE_SIZE-1)/TILE_SIZE; ig++){
- int mcount = 0;
- for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){
- if (ic >= old_ncells) break;
-
- if (mpot[ic] < 0) {
- if (celltype[ic] == REAL_CELL) {
- // remove all but cell that will remain to get count right when split
- // across processors
- if (is_lower_left(i[ic],j[ic]) ) mcount++;
- } else {
- // either upper right or lower left will remain for boundary cells
- if (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) mcount++;
- }
- }
- if (mpot[ic] >= 0) {
- if (celltype[ic] == REAL_CELL){
- mcount += mpot[ic] ? 4 : 1;
- } else {
- mcount += mpot[ic] ? 2 : 1;
- }
- }
- }
- if (mtotal != ioffset[ig]) printf("%d: DEBUG ig %d ioffset %d mtotal %d\n",mype,ig,ioffset[ig],mtotal);
- mtotal += mcount;
- }
-
- // For global This compares ioffset for each block in the calculation
- ezcl_enqueue_read_buffer(command_queue, dev_ioffset_global, CL_TRUE, 0, block_size_global*sizeof(cl_int), &ioffset_global[0], NULL);
- mtotal = 0;
- int count = 0;
- for (uint ig=0; ig<(old_ncells_global+TILE_SIZE-1)/TILE_SIZE; ig++){
- int mcount = 0;
- for (uint ic=ig*TILE_SIZE; ic<(ig+1)*TILE_SIZE; ic++){
- if (ic >= old_ncells_global) break;
-
- if (mpot_global[ic] < 0) {
- if (celltype_global[ic] == REAL_CELL) {
- // remove all but cell that will remain to get count right when split
- // across processors
- if (is_lower_left(i_global[ic],j_global[ic]) ) mcount++;
- } else {
- // either upper right or lower left will remain for boundary cells
- if (is_upper_right(i_global[ic],j_global[ic]) || is_lower_left(i_global[ic],j_global[ic]) ) mcount++;
- }
- }
-
- if (mpot_global[ic] >= 0) {
- if (celltype_global[ic] == REAL_CELL) {
- mcount += mpot_global[ic] ? 4 : 1;
- } else {
- mcount += mpot_global[ic] ? 2 : 1;
- }
- }
- }
- if (mtotal != ioffset_global[ig]) {
- printf("DEBUG global ig %d ioffset %d mtotal %d\n",ig,ioffset_global[ig],mtotal);
- count++;
- }
- if (count > 10) exit(0);
- mtotal += mcount;
- }
-}
-#endif
-
-Mesh::Mesh(int nx, int ny, int levmx_in, int ndim_in, double deltax_in, double deltay_in, int boundary, int parallel_in, int do_gpu_calc)
-{
- lowerBound_Global = NULL;
- upperBound_Global = NULL;
- for (int i = 0; i < MESH_TIMER_SIZE; i++){
- cpu_timers[i] = 0.0;
- gpu_timers[i] = 0L;
- }
-
- for (int i = 0; i < MESH_COUNTER_SIZE; i++){
- cpu_counters[i] = 0;
- gpu_counters[i] = 0;
- }
-
- ndim = ndim_in;
- levmx = levmx_in;
-#ifdef HAVE_OPENCL
- if (ndim == TWO_DIMENSIONAL) defines = "-DTWO_DIMENSIONAL -DCARTESIAN";
-#endif
-
- offtile_ratio_local = 0;
- offtile_local_count = 1;
-
- mype = 0;
- numpe = 1;
- ncells = 0;
- ncells_ghost = 0;
- parallel = parallel_in;
- noffset = 0;
- mem_factor = 1.0;
- //mem_factor = 1.5;
-
-#ifdef HAVE_MPI
- int mpi_init;
- MPI_Initialized(&mpi_init);
- if (mpi_init && parallel){
- MPI_Comm_rank(MPI_COMM_WORLD,&mype);
- MPI_Comm_size(MPI_COMM_WORLD,&numpe);
- }
- // TODO add fini
- if (parallel) mesh_memory.pinit(MPI_COMM_WORLD, 2L * 1024 * 1024 * 1024);
-#endif
- cell_handle = 0;
-
- if (numpe == 1) mem_factor = 1.0;
-
- deltax = deltax_in;
- deltay = deltay_in;
-
- have_boundary = boundary;
-
- //int istart = 1;
- //int jstart = 1;
- //int iend = nx;
- //int jend = ny;
- int nxx = nx;
- int nyy = ny;
- imin = 0;
- jmin = 0;
- imax = nx+1;
- jmax = ny+1;
- if (have_boundary) {
- //istart = 0;
- //jstart = 0;
- //iend = nx + 1;
- //jend = ny + 1;
- nxx = nx + 2;
- nyy = ny + 2;
- imin = 0;
- jmin = 0;
- imax = nx + 1;
- jmax = ny + 1;
- }
-
- xmin = -deltax * 0.5 * (real_t)nxx;
- ymin = -deltay * 0.5 * (real_t)nyy;
- xmax = deltax * 0.5 * (real_t)nxx;
- ymax = deltay * 0.5 * (real_t)nyy;
-
- size_t lvlMxSize = levmx + 1;
-
- levtable.resize(lvlMxSize);
- lev_ibegin.resize(lvlMxSize);
- lev_jbegin.resize(lvlMxSize);
- lev_iend.resize( lvlMxSize);
- lev_jend.resize( lvlMxSize);
- lev_deltax.resize(lvlMxSize);
- lev_deltay.resize(lvlMxSize);
-
- lev_ibegin[0] = imin + 1;
- lev_iend[0] = imax - 1;
- lev_jbegin[0] = jmin + 1;
- lev_jend[0] = jmax - 1;
- lev_deltax[0] = deltax;
- lev_deltay[0] = deltay;
-
- for (int lev = 1; lev <= levmx; lev++) {
- lev_ibegin[lev] = lev_ibegin[lev-1]*2;
- lev_iend[lev] = lev_iend [lev-1]*2 + 1;
- lev_jbegin[lev] = lev_jbegin[lev-1]*2;
- lev_jend[lev] = lev_jend [lev-1]*2 + 1;
- lev_deltax[lev] = lev_deltax[lev-1]*0.5;
- lev_deltay[lev] = lev_deltay[lev-1]*0.5;
- }
- for (uint lev=0; lev<lvlMxSize; lev++){
- levtable[lev] = IPOW2(lev);
- }
-
- if (do_gpu_calc) {
-#ifdef HAVE_OPENCL
- // The copy host ptr flag will have the data copied to the GPU as part of the allocation
- dev_levtable = ezcl_malloc(&levtable[0], const_cast<char *>("dev_levtable"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
- dev_levdx = ezcl_malloc(&lev_deltax[0], const_cast<char *>("dev_levdx"), &lvlMxSize, sizeof(cl_real_t), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
- dev_levdy = ezcl_malloc(&lev_deltay[0], const_cast<char *>("dev_levdy"), &lvlMxSize, sizeof(cl_real_t), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
- dev_levibeg = ezcl_malloc(&lev_ibegin[0], const_cast<char *>("dev_levibeg"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
- dev_leviend = ezcl_malloc(&lev_iend[0], const_cast<char *>("dev_leviend"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
- dev_levjbeg = ezcl_malloc(&lev_jbegin[0], const_cast<char *>("dev_levjbeg"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
- dev_levjend = ezcl_malloc(&lev_jend[0], const_cast<char *>("dev_levjend"), &lvlMxSize, sizeof(cl_int), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 0);
-#endif
- }
-
- ibase = 0;
-
- int ncells_corners = 4;
- int i_corner[] = { 0, 0,imax,imax};
- int j_corner[] = { 0,jmax, 0,jmax};
-
- for(int ic=0; ic<ncells_corners; ic++){
- for (int jj = j_corner[ic]*IPOW2(levmx); jj < (j_corner[ic]+1)*IPOW2(levmx); jj++) {
- for (int ii = i_corner[ic]*IPOW2(levmx); ii < (i_corner[ic]+1)*IPOW2(levmx); ii++) {
- corners_i.push_back(ii);
- corners_j.push_back(jj);
- }
- }
- }
-
- do_rezone = true;
- gpu_do_rezone = true;
-
- celltype = NULL;
- nlft = NULL;
- nrht = NULL;
- nbot = NULL;
- ntop = NULL;
-}
-
-void Mesh::init(int nx, int ny, real_t circ_radius, partition_method initial_order, int do_gpu_calc)
-{
- if (do_gpu_calc) {
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
-
- hash_lib_init();
- if (mype == 0) printf("Starting compile of kernels in mesh\n");
- char *bothsources = (char *)malloc(strlen(mesh_kern_source)+strlen(get_hash_kernel_source_string())+1);
- strcpy(bothsources, get_hash_kernel_source_string());
- strcat(bothsources, mesh_kern_source);
- strcat(bothsources, "\0");
- const char *defines = NULL;
- cl_program program = ezcl_create_program_wsource(context, defines, bothsources);
- free(bothsources);
-
- kernel_reduction_scan2 = ezcl_create_kernel_wprogram(program, "finish_reduction_scan2_cl");
- kernel_reduction_count = ezcl_create_kernel_wprogram(program, "finish_reduction_count_cl");
- kernel_reduction_count2 = ezcl_create_kernel_wprogram(program, "finish_reduction_count2_cl");
- kernel_hash_adjust_sizes = ezcl_create_kernel_wprogram(program, "hash_adjust_sizes_cl");
- kernel_hash_setup = ezcl_create_kernel_wprogram(program, "hash_setup_cl");
- kernel_hash_setup_local = ezcl_create_kernel_wprogram(program, "hash_setup_local_cl");
- kernel_neighbor_init = ezcl_create_kernel_wprogram(program, "neighbor_init_cl");
- kernel_calc_neighbors = ezcl_create_kernel_wprogram(program, "calc_neighbors_cl");
- kernel_calc_neighbors_local = ezcl_create_kernel_wprogram(program, "calc_neighbors_local_cl");
- kernel_calc_border_cells = ezcl_create_kernel_wprogram(program, "calc_border_cells_cl");
- kernel_calc_border_cells2 = ezcl_create_kernel_wprogram(program, "calc_border_cells2_cl");
- kernel_finish_scan = ezcl_create_kernel_wprogram(program, "finish_scan_cl");
- kernel_get_border_data = ezcl_create_kernel_wprogram(program, "get_border_data_cl");
- kernel_calc_layer1 = ezcl_create_kernel_wprogram(program, "calc_layer1_cl");
- kernel_calc_layer1_sethash = ezcl_create_kernel_wprogram(program, "calc_layer1_sethash_cl");
- kernel_calc_layer2 = ezcl_create_kernel_wprogram(program, "calc_layer2_cl");
- kernel_get_border_data2 = ezcl_create_kernel_wprogram(program, "get_border_data2_cl");
- kernel_calc_layer2_sethash = ezcl_create_kernel_wprogram(program, "calc_layer2_sethash_cl");
- kernel_copy_mesh_data = ezcl_create_kernel_wprogram(program, "copy_mesh_data_cl");
- kernel_fill_mesh_ghost = ezcl_create_kernel_wprogram(program, "fill_mesh_ghost_cl");
- kernel_fill_neighbor_ghost = ezcl_create_kernel_wprogram(program, "fill_neighbor_ghost_cl");
- kernel_set_corner_neighbor = ezcl_create_kernel_wprogram(program, "set_corner_neighbor_cl");
- kernel_adjust_neighbors_local = ezcl_create_kernel_wprogram(program, "adjust_neighbors_local_cl");
- kernel_hash_size = ezcl_create_kernel_wprogram(program, "calc_hash_size_cl");
- kernel_finish_hash_size = ezcl_create_kernel_wprogram(program, "finish_reduction_minmax4_cl");
- kernel_calc_spatial_coordinates = ezcl_create_kernel_wprogram(program, "calc_spatial_coordinates_cl");
- kernel_do_load_balance_lower = ezcl_create_kernel_wprogram(program, "do_load_balance_lower_cl");
- kernel_do_load_balance_middle = ezcl_create_kernel_wprogram(program, "do_load_balance_middle_cl");
- kernel_do_load_balance_upper = ezcl_create_kernel_wprogram(program, "do_load_balance_upper_cl");
-#ifndef MINIMUM_PRECISION
- kernel_do_load_balance_double = ezcl_create_kernel_wprogram(program, "do_load_balance_double_cl");
-#endif
- kernel_do_load_balance_float = ezcl_create_kernel_wprogram(program, "do_load_balance_float_cl");
- kernel_refine_smooth = ezcl_create_kernel_wprogram(program, "refine_smooth_cl");
- kernel_coarsen_smooth = ezcl_create_kernel_wprogram(program, "coarsen_smooth_cl");
- kernel_coarsen_check_block = ezcl_create_kernel_wprogram(program, "coarsen_check_block_cl");
- kernel_rezone_all = ezcl_create_kernel_wprogram(program, "rezone_all_cl");
- kernel_rezone_neighbors = ezcl_create_kernel_wprogram(program, "rezone_neighbors_cl");
-#ifndef MINIMUM_PRECISION
- kernel_rezone_one_double = ezcl_create_kernel_wprogram(program, "rezone_one_double_cl");
-#endif
- kernel_rezone_one_float = ezcl_create_kernel_wprogram(program, "rezone_one_float_cl");
- kernel_copy_mpot_ghost_data = ezcl_create_kernel_wprogram(program, "copy_mpot_ghost_data_cl");
- kernel_set_boundary_refinement = ezcl_create_kernel_wprogram(program, "set_boundary_refinement");
- init_kernel_2stage_sum();
- init_kernel_2stage_sum_int();
- if (! have_boundary){
- kernel_count_BCs = ezcl_create_kernel_wprogram(program, "count_BCs_cl");
- }
-
- ezcl_program_release(program);
- if (mype == 0) printf("Finishing compile of kernels in mesh\n");
-#endif
- }
-
- //KDTree_Initialize(&tree);
- if (ncells > 0) { // this is a restart.
- nsizes.resize (numpe);
- ndispl.resize (numpe);
- if (parallel && numpe > 1) {
-#ifdef HAVE_MPI
- int ncells_int = ncells;
- MPI_Allgather(&ncells_int, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- noffset=ndispl[mype];
- ncells_global = ndispl[numpe-1] + nsizes[numpe-1];
-#endif
- } else {
- noffset = 0;
- ncells_global = ncells;
- proc.resize (ncells);
- calc_distribution(numpe);
- }
- calc_celltype(ncells);
-
- } else {
- int istart = 1,
- jstart = 1,
- iend = nx,
- jend = ny,
- nxx = nx,
- nyy = ny;
- if (have_boundary) {
- istart = 0;
- jstart = 0;
- iend = nx + 1;
- jend = ny + 1;
- nxx = nx + 2;
- nyy = ny + 2;
- }
-
- if (ndim == TWO_DIMENSIONAL) ncells = nxx * nyy - have_boundary * 4;
- else ncells = nxx * nyy;
-
- noffset = 0;
- if (parallel) {
- ncells_global = ncells;
- nsizes.resize(numpe);
- ndispl.resize(numpe);
-
- for (int ip=0; ip<numpe; ip++){
- nsizes[ip] = ncells_global/numpe;
- if (ip < (int)(ncells_global%numpe)) nsizes[ip]++;
- }
-
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- ncells= nsizes[mype];
- noffset=ndispl[mype];
- }
-
- allocate(ncells);
- index.resize(ncells);
-
- int ic = 0;
-
- for (int jj = jstart; jj <= jend; jj++) {
- for (int ii = istart; ii <= iend; ii++) {
- if (have_boundary && ii == 0 && jj == 0 ) continue;
- if (have_boundary && ii == 0 && jj == jend) continue;
- if (have_boundary && ii == iend && jj == 0 ) continue;
- if (have_boundary && ii == iend && jj == jend) continue;
-
- if (ic >= (int)noffset && ic < (int)(ncells+noffset)){
- int iclocal = ic-noffset;
- index[iclocal] = ic;
- i[iclocal] = ii;
- j[iclocal] = jj;
- level[iclocal] = 0;
- }
- ic++;
- }
- }
-
- //if (numpe > 1 && (initial_order != HILBERT_SORT && initial_order != HILBERT_PARTITION) ) mem_factor = 2.0;
- partition_cells(numpe, index, initial_order);
-
- calc_celltype(ncells);
- calc_spatial_coordinates(0);
-
- // Start lev loop here
- for (int ilevel=1; ilevel<=levmx; ilevel++) {
-
- //int old_ncells = ncells;
-
- ncells_ghost = ncells;
- calc_neighbors_local();
-
- kdtree_setup();
-
- int nez;
- vector<int> ind(ncells);
-
- #ifdef FULL_PRECISION
- KDTree_QueryCircleIntersect_Double(&tree, &nez, &(ind[0]), circ_radius, ncells, &x[0], &dx[0], &y[0], &dy[0]);
- #else
- KDTree_QueryCircleIntersect_Float(&tree, &nez, &(ind[0]), circ_radius, ncells, &x[0], &dx[0], &y[0], &dy[0]);
- #endif
-
- vector<int> mpot(ncells_ghost,0);
-
- for (int ic=0; ic<nez; ++ic){
- if (level[ind[ic]] < levmx) mpot[ind[ic]] = 1;
- }
-
- KDTree_Destroy(&tree);
- // Refine the cells.
- int icount = 0;
- int jcount = 0;
- int new_ncells = refine_smooth(mpot, icount, jcount);
-
- MallocPlus dummy;
- rezone_all(icount, jcount, mpot, 0, dummy);
-
- ncells = new_ncells;
-
- calc_spatial_coordinates(0);
-
- #ifdef HAVE_MPI
- if (parallel && numpe > 1) {
- int ncells_int = ncells;
- MPI_Allgather(&ncells_int, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- noffset=ndispl[mype];
- ncells_global = ndispl[numpe-1] + nsizes[numpe-1];
- }
- #endif
- } // End lev loop here
- index.clear();
- ncells_ghost = ncells;
- }
- int ncells_corners = 4;
- int i_corner[] = { 0, 0,imax,imax};
- int j_corner[] = { 0,jmax, 0,jmax};
-
- for(int ic=0; ic<ncells_corners; ic++){
- for (int jj = j_corner[ic]*IPOW2(levmx); jj < (j_corner[ic]+1)*IPOW2(levmx); jj++) {
- for (int ii = i_corner[ic]*IPOW2(levmx); ii < (i_corner[ic]+1)*IPOW2(levmx); ii++) {
- corners_i.push_back(ii);
- corners_j.push_back(jj);
- }
- }
- }
-}
-
-size_t Mesh::refine_smooth(vector<int> &mpot, int &icount, int &jcount)
-{
- vector<int> mpot_old;
-
- int newcount;
- int newcount_global;
-
- struct timeval tstart_lev2;
-
- rezone_count(mpot, icount, jcount);
-
-#ifdef _OPENMP
-#pragma omp parallel
-{ //START Parallel Region
-#endif
-
-#ifdef _OPENMP
-#pragma omp master
-{//MASTER START
-#endif
- newcount = icount;
- newcount_global = newcount;
-
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
-#ifdef HAVE_MPI
- if (parallel) {
- MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
- }
-#endif
-
-#ifdef _OPENMP
-}//END MASTER
-#pragma omp barrier
-#endif
-
- if(newcount_global > 0 && levmx > 1) {
-
- size_t my_ncells=ncells;
- if (parallel) my_ncells=ncells_ghost;
-
-#ifdef _OPENMP
-#pragma omp master
-{//MASTER START
-#endif
- cpu_counters[MESH_COUNTER_REFINE_SMOOTH]++;
-
- mpot_old.resize(my_ncells);
-#ifdef _OPENMP
-}//END MASTER
-#pragma omp barrier
-#endif
-
- int levcount = 1;
-
- while (newcount_global > 0 && levcount < levmx){
-
- levcount++;
-#ifdef _OPENMP
-#pragma omp master
-{//MASTER START
-#endif
-
- mpot.swap(mpot_old);
- newcount=0;
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Update(&mpot_old[0], L7_INT, cell_handle);
- }
-#endif
-
-#ifdef _OPENMP
-}//END MASTER
-#pragma omp barrier
-#endif
-
- int upperBound, lowerBound;
- get_bounds(upperBound, lowerBound);
- int mynewcount = newcount; //All threads get a mynewcount
-
-#ifdef _OPENMP
-#pragma omp for reduction(+:newcount)
-#endif
- for(uint ic = 0; ic < ncells; ic++) {
- // for(uint ic = lowerBound; ic < upperBound; ic++){
- int lev = level[ic];
- mpot[ic] = mpot_old[ic];
- if(mpot_old[ic] > 0) continue;
-
- int nl = nlft[ic];
- if (nl >= 0 && nl < (int)ncells_ghost) {
- int ll = level[nl];
- if(mpot_old[nl] > 0) ll++;
-
- if(ll - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
-
- ll = level[nl];
- if (ll > lev) {
- int nlt = ntop[nl];
- if (nlt >= 0 && nlt < (int)ncells_ghost) {
- int llt = level[nlt];
- if(mpot_old[nlt] > 0) llt++;
-
- if(llt - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
- }
- }
- }
-
- int nr = nrht[ic];
- if (nr >= 0 && nr < (int)ncells_ghost) {
- int lr = level[nr];
- if(mpot_old[nr] > 0) lr++;
-
- if(lr - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
-
- lr = level[nr];
- if (lr > lev) {
- int nrt = ntop[nr];
- if (nrt >= 0 && nrt < (int)ncells_ghost) {
- int lrt = level[nrt];
- if(mpot_old[nrt] > 0) lrt++;
-
- if(lrt - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
- }
- }
- }
-
- int nt = ntop[ic];
- if (nt >= 0 && nt < (int)ncells_ghost) {
- int lt = level[nt];
- if(mpot_old[nt] > 0) lt++;
-
- if(lt - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
-
- lt = level[nt];
- if (lt > lev) {
- int ntr = nrht[nt];
- if (ntr >= 0 && ntr < (int)ncells_ghost) {
- int ltr = level[ntr];
- if(mpot_old[ntr] > 0) ltr++;
-
- if(ltr - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
- }
- }
- }
-
- int nb = nbot[ic];
- if (nb >= 0 && nb < (int)ncells_ghost) {
- int lb = level[nb];
- if(mpot_old[nb] > 0) lb++;
-
- if(lb - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
-
- lb = level[nb];
- if (lb > lev) {
- int nbr = nrht[nb];
- if (nbr >= 0 && nbr < (int)ncells_ghost) {
- int lbr = level[nbr];
- if(mpot_old[nbr] > 0) lbr++;
-
- if(lbr - lev > 1) {
- mpot[ic]=1;
- mynewcount++;
- continue;
- }
- }
- }
- }
- }
-#ifdef _OPENMP
-#pragma omp atomic
-#endif
- newcount += mynewcount;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
-{
-#endif
- icount += newcount;
- newcount_global = newcount;
-
-#ifdef HAVE_MPI
- if (parallel) {
- MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
- }
-#endif
-
-#ifdef _OPENMP
-}//END MASTER
-#pragma omp barrier
-#endif
-
- } // while (newcount_global > 0 && levcount < levmx);
-
- }
-
-
-#ifdef _OPENMP
-#pragma omp master
-{
-#endif
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Update(&mpot[0], L7_INT, cell_handle);
- }
-#endif
-
- mpot_old.clear();
- mpot_old.resize(ncells_ghost);
-
- mpot_old.swap(mpot);
-#ifdef _OPENMP
-}//END MASTER
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for(uint ic=0; ic<ncells; ic++) {
- mpot[ic] = mpot_old[ic];
- if (mpot_old[ic] >= 0) continue;
- if (mpot_old[ic] <= -1000000) continue;
- if ( is_upper_right(i[ic],j[ic]) ) {
- int nr = nrht[ic];
- int lr = level[nr];
- if (mpot_old[nr] > 0) lr++;
- int nt = ntop[ic];
- int lt = level[nt];
- if (mpot_old[nt] > 0) lt++;
- if (lr > level[ic] || lt > level[ic]) mpot[ic] = 0;
- } else if ( is_upper_left(i[ic],j[ic] ) ) {
- int nl = nlft[ic];
- int ll = level[nl];
- if (mpot_old[nl] > 0) ll++;
- int nt = ntop[ic];
- int lt = level[nt];
- if (mpot_old[nt] > 0) lt++;
- if (ll > level[ic] || lt > level[ic]) mpot[ic] = 0;
- } else if ( is_lower_right(i[ic],j[ic] ) ) {
- int nr = nrht[ic];
- int lr = level[nr];
- if (mpot_old[nr] > 0) lr++;
- int nb = nbot[ic];
- int lb = level[nb];
- if (mpot_old[nb] > 0) lb++;
- if (lr > level[ic] || lb > level[ic]) mpot[ic] = 0;
- } else if ( is_lower_left(i[ic],j[ic] ) ) {
- int nl = nlft[ic];
- int ll = level[nl];
- if (mpot_old[nl] > 0) ll++;
- int nb = nbot[ic];
- int lb = level[nb];
- if (mpot_old[nb] > 0) lb++;
- if (ll > level[ic] || lb > level[ic]) mpot[ic] = 0;
- }
- }
-
-#ifdef _OPENMP
-#pragma omp master
-{
-#endif
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Update(&mpot[0], L7_INT, cell_handle);
- }
-#endif
-
- mpot_old.swap(mpot);
-#ifdef _OPENMP
-}//END MASTER
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for(uint ic=0; ic<ncells; ic++) {
- int n1=0, n2=0, n3=0;
- mpot[ic] = mpot_old[ic];
- if (mpot_old[ic] >= 0) continue;
- if (mpot_old[ic] <= -1000000) continue;
- if ( is_upper_right(i[ic],j[ic]) ) {
- n1 = nbot[ic];
- n2 = nlft[ic];
- n3 = nlft[n1];
- } else if ( is_upper_left(i[ic],j[ic] ) ) {
- n1 = nbot[ic];
- n2 = nrht[ic];
- n3 = nrht[n1];
- } else if ( is_lower_right(i[ic],j[ic] ) ) {
- n1 = ntop[ic];
- n2 = nlft[ic];
- n3 = nlft[n1];
- } else if ( is_lower_left(i[ic],j[ic] ) ) {
- n1 = ntop[ic];
- n2 = nrht[ic];
- n3 = nrht[n1];
- }
- if (n3 < 0) {
- mpot[ic] = 0;
- } else {
- int lev1 = level[n1];
- int lev2 = level[n2];
- int lev3 = level[n3];
- if (mpot_old[n1] > 0) lev1++;
- if (mpot_old[n2] > 0) lev2++;
- if (mpot_old[n3] > 0) lev3++;
-
- if (mpot_old[n1] != -1 || lev1 != level[ic] ||
- mpot_old[n2] != -1 || lev2 != level[ic] ||
- mpot_old[n3] != -1 || lev3 != level[ic]) {
- mpot[ic] = 0;
- }
- }
- }
-
-#ifdef _OPENMP
-#pragma omp master
-{
-#endif
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Update(&mpot[0], L7_INT, cell_handle);
- }
-#endif
-
-#ifdef _OPENMP
-}//END MASTER
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells; ic++) {
- if (celltype[ic] < 0) {
- switch (celltype[ic]) {
- case LEFT_BOUNDARY:
- mpot[ic] = mpot[nrht[ic]];
- break;
- case RIGHT_BOUNDARY:
- mpot[ic] = mpot[nlft[ic]];
- break;
- case BOTTOM_BOUNDARY:
- mpot[ic] = mpot[ntop[ic]];
- break;
- case TOP_BOUNDARY:
- mpot[ic] = mpot[nbot[ic]];
- break;
- }
- }
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-}//END Parallel Region
-#endif
-
- newcount = ncells + rezone_count(mpot, icount, jcount);
-
-#ifdef HAVE_MPI
- int icount_global = icount;
- int jcount_global = jcount;
- if (parallel) {
- int count[2], global_count[2];
- count[0] = icount;
- count[1] = jcount;
- MPI_Allreduce(&count, &global_count, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
- icount_global = global_count[0];
- jcount_global = global_count[1];
- }
- do_rezone = (icount_global != 0 || jcount_global != 0) ? true : false;
-#else
- do_rezone = (icount != 0 || jcount != 0) ? true : false;
-#endif
-
-
- if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_REFINE_SMOOTH] += cpu_timer_stop(tstart_lev2);
-
- return(newcount);
-}
-
-#ifdef HAVE_OPENCL
-int Mesh::gpu_refine_smooth(cl_mem &dev_mpot, int &icount, int &jcount)
-{
- struct timeval tstart_lev2;
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- size_t local_work_size = 128;
- size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
- size_t block_size = global_work_size/local_work_size;
-
- int icount_global = icount;
- int jcount_global = jcount;
-
-#ifdef HAVE_MPI
- if (parallel) {
- int count[2], count_global[2];
- count[0] = icount;
- count[1] = jcount;
- MPI_Allreduce(&count, &count_global, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
- icount_global = count_global[0];
- jcount_global = count_global[1];
- }
-#endif
-
- int levcount = 1;
- //int which_smooth=0;
-
- if(icount_global > 0 && levcount < levmx) {
- size_t result_size = 1;
- cl_mem dev_result = ezcl_malloc(NULL, const_cast<char *>("dev_result"), &result_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_mpot_old = ezcl_malloc(NULL, const_cast<char *>("dev_mpot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- int newcount = icount;
- int newcount_global = icount_global;
- while (newcount_global > 0 && levcount < levmx) {
- levcount++;
-
- gpu_counters[MESH_COUNTER_REFINE_SMOOTH]++;
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
- }
-#endif
-
- if (icount_global) {
- ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot);
-
- ezcl_set_kernel_arg(kernel_refine_smooth, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_refine_smooth, 1, sizeof(cl_int), (void *)&ncells_ghost);
- ezcl_set_kernel_arg(kernel_refine_smooth, 2, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_refine_smooth, 3, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_refine_smooth, 4, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_refine_smooth, 5, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_refine_smooth, 6, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_refine_smooth, 7, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_refine_smooth, 8, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_refine_smooth, 9, sizeof(cl_mem), (void *)&dev_mpot_old);
- ezcl_set_kernel_arg(kernel_refine_smooth,10, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_refine_smooth,11, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_refine_smooth,12, sizeof(cl_mem), (void *)&dev_result);
- ezcl_set_kernel_arg(kernel_refine_smooth,13, local_work_size*sizeof(cl_int), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_refine_smooth, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- gpu_rezone_count(block_size, local_work_size, dev_redscratch, dev_result);
-
- int result;
- ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &result, NULL);
-
- //printf("result = %d after %d refine smooths\n",result,which_smooth);
- //which_smooth++;
-
- icount = result;
- }
-
- newcount = icount-newcount;
- newcount_global = newcount;
-#ifdef HAVE_MPI
- if (parallel) {
- MPI_Allreduce(&newcount, &newcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
- }
-#endif
- icount_global += newcount_global;
- //printf("DEBUG -- icount %d icount_global %d newcount %d newcount_global %d\n",icount,icount_global,newcount,newcount_global);
- }
-
- ezcl_device_memory_delete(dev_mpot_old);
- ezcl_device_memory_delete(dev_redscratch);
- ezcl_device_memory_delete(dev_result);
- }
-
- if (jcount_global) {
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
- }
-#endif
-
- cl_mem dev_mpot_old = ezcl_malloc(NULL, const_cast<char *>("dev_mpot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- if (jcount) {
- ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot);
-
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 1, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 2, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 3, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 4, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 5, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 6, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 7, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 8, sizeof(cl_mem), (void *)&dev_mpot_old);
- ezcl_set_kernel_arg(kernel_coarsen_smooth, 9, sizeof(cl_mem), (void *)&dev_mpot);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_coarsen_smooth, 1, NULL, &global_work_size, &local_work_size, NULL);
- }
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
- }
-#endif
-
- if (jcount) {
- size_t result_size = 1;
- cl_mem dev_result = ezcl_malloc(NULL, const_cast<char *>("dev_result"), &result_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_device_memory_swap(&dev_mpot_old, &dev_mpot);
-
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 1, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 2, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 3, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 4, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 5, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 6, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 7, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 8, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_coarsen_check_block, 9, sizeof(cl_mem), (void *)&dev_mpot_old);
- ezcl_set_kernel_arg(kernel_coarsen_check_block,10, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_coarsen_check_block,11, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_coarsen_check_block,12, sizeof(cl_mem), (void *)&dev_result);
- ezcl_set_kernel_arg(kernel_coarsen_check_block,13, local_work_size*sizeof(cl_int), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_coarsen_check_block, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- gpu_rezone_count(block_size, local_work_size, dev_redscratch, dev_result);
-
- int result;
- ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &result, NULL);
-
- //printf("result = %d after coarsen smooth\n",result);
-
- jcount = result;
-
- ezcl_device_memory_delete(dev_redscratch);
- ezcl_device_memory_delete(dev_result);
- }
-
- jcount_global = jcount;
-
-#ifdef HAVE_MPI
- if (parallel) {
- MPI_Allreduce(&jcount, &jcount_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
- }
-#endif
-
- ezcl_device_memory_delete(dev_mpot_old);
- }
-
- if (icount_global || jcount_global) {
-#ifdef HAVE_MPI
- if (numpe > 1) {
- L7_Dev_Update(dev_mpot, L7_INT, cell_handle);
- }
-#endif
-
- size_t result_size = 1;
- cl_mem dev_result = ezcl_malloc(NULL, const_cast<char *>("dev_result"), &result_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
- cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
- dev_ioffset = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &block_size, sizeof(cl_uint), CL_MEM_READ_WRITE, 0);
-
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 1, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 2, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 3, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 4, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 5, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 6, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 7, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 8, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 9, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 10, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 11, sizeof(cl_mem), (void *)&dev_result);
- ezcl_set_kernel_arg(kernel_set_boundary_refinement, 12, local_work_size*sizeof(cl_int2), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_set_boundary_refinement, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- gpu_rezone_count2(block_size, local_work_size, dev_redscratch, dev_result);
-
- int my_result[2];
- ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, 1*sizeof(cl_int2), &my_result, NULL);
- //printf("Result is %lu icount %d jcount %d\n", ncells+my_result[0]-my_result[1],my_result[0],my_result[1]);
- icount = my_result[0];
- jcount = my_result[1];
-
- icount_global = icount;
- jcount_global = jcount;
-#ifdef HAVE_MPI
- if (parallel) {
- int count[2], count_global[2];
- count[0] = icount;
- count[1] = jcount;
- MPI_Allreduce(&count, &count_global, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
- icount_global = count_global[0];
- jcount_global = count_global[1];
- }
-#endif
-
- gpu_rezone_scan(block_size, local_work_size, dev_ioffset, dev_result);
-
- //ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int), &my_result, NULL);
- //printf("After scan, Result is %d\n", my_result[0]);
-
- ezcl_device_memory_delete(dev_result);
- ezcl_device_memory_delete(dev_redscratch);
-
- } else {
- ezcl_device_memory_delete(dev_mpot);
- dev_mpot = NULL;
- }
-
- gpu_do_rezone = (icount_global != 0 || jcount_global != 0) ? true : false;
-
- if (TIMING_LEVEL >= 2) gpu_timers[MESH_TIMER_REFINE_SMOOTH] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
-
- return ncells+icount-jcount;
-}
-#endif
-
-void Mesh::terminate(void)
-{
- mesh_memory.memory_delete(i);
- mesh_memory.memory_delete(j);
- mesh_memory.memory_delete(level);
- mesh_memory.memory_delete(celltype);
- if (neighbor_remap) {
- mesh_memory.memory_delete(nlft);
- mesh_memory.memory_delete(nrht);
- mesh_memory.memory_delete(nbot);
- mesh_memory.memory_delete(ntop);
- }
-
-#ifdef HAVE_OPENCL
- hash_lib_terminate();
-
- ezcl_device_memory_delete(dev_levtable);
- ezcl_device_memory_delete(dev_levdx);
- ezcl_device_memory_delete(dev_levdy);
- ezcl_device_memory_delete(dev_levibeg);
- ezcl_device_memory_delete(dev_leviend);
- ezcl_device_memory_delete(dev_levjbeg);
- ezcl_device_memory_delete(dev_levjend);
-
- ezcl_device_memory_delete(dev_level);
- ezcl_device_memory_delete(dev_i);
- ezcl_device_memory_delete(dev_j);
- ezcl_device_memory_delete(dev_celltype);
- if (neighbor_remap && dev_nlft != NULL){
- ezcl_device_memory_delete(dev_nlft);
- ezcl_device_memory_delete(dev_nrht);
- ezcl_device_memory_delete(dev_nbot);
- ezcl_device_memory_delete(dev_ntop);
- }
-
- ezcl_kernel_release(kernel_reduction_scan2);
- ezcl_kernel_release(kernel_reduction_count);
- ezcl_kernel_release(kernel_reduction_count2);
- ezcl_kernel_release(kernel_hash_adjust_sizes);
- ezcl_kernel_release(kernel_hash_setup);
- ezcl_kernel_release(kernel_hash_setup_local);
- ezcl_kernel_release(kernel_neighbor_init);
- ezcl_kernel_release(kernel_calc_neighbors);
- ezcl_kernel_release(kernel_calc_neighbors_local);
- ezcl_kernel_release(kernel_calc_border_cells);
- ezcl_kernel_release(kernel_calc_border_cells2);
- ezcl_kernel_release(kernel_finish_scan);
- ezcl_kernel_release(kernel_get_border_data);
- ezcl_kernel_release(kernel_calc_layer1);
- ezcl_kernel_release(kernel_calc_layer1_sethash);
- ezcl_kernel_release(kernel_calc_layer2);
- ezcl_kernel_release(kernel_get_border_data2);
- ezcl_kernel_release(kernel_calc_layer2_sethash);
- //ezcl_kernel_release(kernel_calc_neighbors_local2);
- ezcl_kernel_release(kernel_copy_mesh_data);
- ezcl_kernel_release(kernel_fill_mesh_ghost);
- ezcl_kernel_release(kernel_fill_neighbor_ghost);
- ezcl_kernel_release(kernel_set_corner_neighbor);
- ezcl_kernel_release(kernel_adjust_neighbors_local);
- //ezcl_kernel_release(kernel_copy_ghost_data);
- //ezcl_kernel_release(kernel_adjust_neighbors);
- ezcl_kernel_release(kernel_hash_size);
- ezcl_kernel_release(kernel_finish_hash_size);
- ezcl_kernel_release(kernel_calc_spatial_coordinates);
- ezcl_kernel_release(kernel_do_load_balance_lower);
- ezcl_kernel_release(kernel_do_load_balance_middle);
- ezcl_kernel_release(kernel_do_load_balance_upper);
-#ifndef MINIMUM_PRECISION
- ezcl_kernel_release(kernel_do_load_balance_double);
-#endif
- ezcl_kernel_release(kernel_do_load_balance_float);
- ezcl_kernel_release(kernel_refine_smooth);
- ezcl_kernel_release(kernel_coarsen_smooth);
- ezcl_kernel_release(kernel_coarsen_check_block);
- ezcl_kernel_release(kernel_rezone_all);
- ezcl_kernel_release(kernel_rezone_neighbors);
-#ifndef MINIMUM_PRECISION
- ezcl_kernel_release(kernel_rezone_one_double);
-#endif
- ezcl_kernel_release(kernel_rezone_one_float);
- ezcl_kernel_release(kernel_copy_mpot_ghost_data);
- ezcl_kernel_release(kernel_set_boundary_refinement);
- terminate_kernel_2stage_sum();
- terminate_kernel_2stage_sum_int();
- if (! have_boundary){
- ezcl_kernel_release(kernel_count_BCs);
- }
-#endif
-#if defined(HAVE_J7) && defined(HAVE_MPI)
- if (parallel) mesh_memory.pfini();
-#endif
-}
-
-int Mesh::rezone_count(vector<int> mpot, int &icount, int &jcount)
-{
- int my_icount=0;
- int my_jcount=0;
-
-#ifdef _OPENMP
-#pragma omp parallel for reduction (+:my_jcount,my_icount)
-#endif
- for (uint ic=0; ic<ncells; ++ic){
- if (mpot[ic] < 0) {
- if (celltype[ic] == REAL_CELL) {
- // remove all but cell that will remain to get count right when split
- // across processors
- if (! is_lower_left(i[ic],j[ic]) ) my_jcount--;
- } else {
- // either upper right or lower left will remain for boundary cells
- if (! (is_upper_right(i[ic],j[ic]) || is_lower_left(i[ic],j[ic]) ) ) my_jcount--;
- }
- }
-
- if (mpot[ic] > 0) {
- //printf("mpot[%d] = %d level %d levmx %d\n",ic,mpot[ic],level[ic],levmx);
- if (celltype[ic] == REAL_CELL){
- my_icount += 3;
- } else {
- my_icount ++;
- }
- }
- }
- //printf("icount is %d\n",my_icount);
- icount = my_icount;
- jcount = my_jcount;
-
- return(icount+jcount);
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::gpu_rezone_count2(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- /*
- __kernel void finish_reduction_count2_cl(
- const int isize, // 0
- __global int *redscratch, // 1
- __global int *result, // 2
- __local int *tile) // 3
- */
- ezcl_set_kernel_arg(kernel_reduction_count2, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_reduction_count2, 1, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_reduction_count2, 2, sizeof(cl_mem), (void *)&dev_result);
- ezcl_set_kernel_arg(kernel_reduction_count2, 3, local_work_size*sizeof(cl_int2), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_count2, 1, NULL, &local_work_size, &local_work_size, NULL);
-}
-
-void Mesh::gpu_rezone_count(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- /*
- __kernel void finish_reduction_count_cl(
- const int isize, // 0
- __global int *redscratch, // 1
- __global int *result, // 2
- __local int *tile) // 3
- */
- ezcl_set_kernel_arg(kernel_reduction_count, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_reduction_count, 1, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_reduction_count, 2, sizeof(cl_mem), (void *)&dev_result);
- ezcl_set_kernel_arg(kernel_reduction_count, 3, local_work_size*sizeof(cl_int), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_count, 1, NULL, &local_work_size, &local_work_size, NULL);
-}
-
-void Mesh::gpu_rezone_scan(size_t block_size, size_t local_work_size, cl_mem dev_ioffset, cl_mem &dev_result)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- /*
- __kernel void finish_reduction_scan_cl(
- const int isize, // 0
- __global int *ioffset, // 1
- __global int *result, // 2
- __local int *tile) // 3
- */
- ezcl_set_kernel_arg(kernel_reduction_scan2, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_reduction_scan2, 1, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_reduction_scan2, 2, sizeof(cl_mem), (void *)&dev_result);
- ezcl_set_kernel_arg(kernel_reduction_scan2, 3, local_work_size*sizeof(cl_uint2), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_scan2, 1, NULL, &local_work_size, &local_work_size, NULL);
-}
-#endif
-
-void Mesh::kdtree_setup()
-{
- KDTree_Initialize(&tree);
-
- TBounds box;
- for (uint ic=0; ic<ncells; ic++) {
- box.min.x = x[ic];
- box.max.x = x[ic]+dx[ic];
- box.min.y = y[ic];
- box.max.y = y[ic]+dy[ic];
- KDTree_AddElement(&tree, &box);
- }
-}
-
-void Mesh::calc_spatial_coordinates(int ibase)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- x.resize(ncells);
- dx.resize(ncells);
- y.resize(ncells);
- dy.resize(ncells);
-
-#ifdef _OPENMP
-#pragma omp parallel
- {
-#endif
-
- int lowerBounds, upperBounds;
- set_bounds(ncells);
- get_bounds(lowerBounds, upperBounds);
-
- if (have_boundary) {
- for (uint ic = lowerBounds; ic < upperBounds; ic++) {
- int lev = level[ic];
- x[ic] = xmin + (lev_deltax[lev] * (i[ic] - ibase));
- dx[ic] = lev_deltax[lev];
- y[ic] = ymin + (lev_deltay[lev] * (j[ic] - ibase));
- dy[ic] = lev_deltay[lev];
- }
- } else {
- for (uint ic = lowerBounds; ic < upperBounds; ic++) {
- int lev = level[ic];
- x[ic] = xmin + (lev_deltax[lev] * (i[ic] - lev_ibegin[lev]));
- dx[ic] = lev_deltax[lev];
- y[ic] = ymin + (lev_deltay[lev] * (j[ic] - lev_jbegin[lev]));
- dy[ic] = lev_deltay[lev];
- }
- }
-
- cpu_timers[MESH_TIMER_CALC_SPATIAL_COORDINATES] += cpu_timer_stop(tstart_cpu);
-
-#ifdef _OPENMP
-#pragma omp barrier
- } // end parallel region
-#endif
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::gpu_calc_spatial_coordinates(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- cl_event calc_spatial_coordinates_event;
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- size_t local_work_size = MIN(ncells, TILE_SIZE);
- size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
-
-// Only coded for base 0 and have boundary
-// Need:
-// xmin
-// ymin
-//
-// lev_deltax -- dev_levdx
-// lev_deltay -- dev_levdy
-// x
-// dx
-// y
-// dy
-// level
-// i
-// j
-
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 1, sizeof(cl_real_t), (void *)&xmin);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 2, sizeof(cl_real_t), (void *)&ymin);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 3, sizeof(cl_mem), (void *)&dev_levdx);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 4, sizeof(cl_mem), (void *)&dev_levdy);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 5, sizeof(cl_mem), (void *)&dev_x);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 6, sizeof(cl_mem), (void *)&dev_dx);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 7, sizeof(cl_mem), (void *)&dev_y);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 8, sizeof(cl_mem), (void *)&dev_dy);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 9, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 10, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_calc_spatial_coordinates, 11, sizeof(cl_mem), (void *)&dev_j);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_spatial_coordinates, 1, NULL, &global_work_size, &local_work_size, &calc_spatial_coordinates_event);
-
- ezcl_wait_for_events(1, &calc_spatial_coordinates_event);
- ezcl_event_release(calc_spatial_coordinates_event);
-
- gpu_timers[MESH_TIMER_CALC_SPATIAL_COORDINATES] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
-}
-#endif
-
-void Mesh::calc_minmax(void)
-{
- xmin=+1.0e30, ymin=+1.0e30, zmin=+1.0e30;
-
- for (uint ic=0; ic<ncells; ic++){
- if (x[ic] < xmin) xmin = x[ic];
- }
- for (uint ic=0; ic<ncells; ic++){
- if (y[ic] < ymin) ymin = y[ic];
- }
- if (ndim > TWO_DIMENSIONAL) {
- for (uint ic=0; ic<ncells; ic++){
- if (z[ic] < zmin) zmin = z[ic];
- }
- }
-
- xmax=-1.0e30, ymax=-1.0e30, zmax=-1.0e30;
- real_t xhigh, yhigh, zhigh;
-
- for (uint ic=0; ic<ncells; ic++){
- xhigh = x[ic]+dx[ic];
- if (xhigh > xmax) xmax = xhigh;
- }
- for (uint ic=0; ic<ncells; ic++){
- yhigh = y[ic]+dy[ic];
- if (yhigh > ymax) ymax = yhigh;
- }
- if (ndim > TWO_DIMENSIONAL) {
- for (uint ic=0; ic<ncells; ic++){
- zhigh = z[ic]+dz[ic];
- if (zhigh > zmax) zmax = zhigh;
- }
- }
-
-#ifdef HAVE_MPI
- if (parallel) {
- real_t xmin_global,xmax_global,ymin_global,ymax_global;
- MPI_Allreduce(&xmin, &xmin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
- MPI_Allreduce(&xmax, &xmax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
- MPI_Allreduce(&ymin, &ymin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
- MPI_Allreduce(&ymax, &ymax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
- xmin = xmin_global;
- xmax = xmax_global;
- ymin = ymin_global;
- ymax = ymax_global;
- }
-#endif
-
-}
-void Mesh::calc_centerminmax(void)
-{
- xcentermin=+1.0e30, ycentermin=+1.0e30, zcentermin=+1.0e30;
- xcentermax=-1.0e30, ycentermax=-1.0e30, zcentermax=-1.0e30;
- real_t xmid, ymid, zmid;
-
- for (uint ic=0; ic<ncells; ic++){
- xmid = x[ic]+0.5*dx[ic];
- if (xmid < xcentermin) xcentermin = xmid;
- if (xmid > xcentermax) xcentermax = xmid;
- }
- for (uint ic=0; ic<ncells; ic++){
- ymid = y[ic]+0.5*dy[ic];
- if (ymid < ycentermin) ycentermin = ymid;
- if (ymid > ycentermax) ycentermax = ymid;
- }
- if (ndim > TWO_DIMENSIONAL) {
- for (uint ic=0; ic<ncells; ic++){
- zmid = z[ic]+0.5*dz[ic];
- if (zmid < zcentermin) zcentermin = zmid;
- if (zmid > zcentermax) zcentermax = zmid;
- }
- }
-
-#ifdef HAVE_MPI
- if (parallel) {
- real_t xcentermin_global,xcentermax_global,ycentermin_global,ycentermax_global;
- MPI_Allreduce(&xcentermin, &xcentermin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
- MPI_Allreduce(&xcentermax, &xcentermax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
- MPI_Allreduce(&ycentermin, &ycentermin_global, 1, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
- MPI_Allreduce(&ycentermax, &ycentermax_global, 1, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
- xcentermin = xcentermin_global;
- xcentermax = xcentermax_global;
- ycentermin = ycentermin_global;
- ycentermax = ycentermax_global;
- }
-#endif
-
-}
-
-void Mesh::rezone_all(int icount, int jcount, vector<int> mpot, int have_state, MallocPlus &state_memory)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- if (! do_rezone) {
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- index.clear();
- index.resize(ncells);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells; ic++){
- index[ic]=ic;
- }
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_REZONE_ALL] += cpu_timer_stop(tstart_cpu);
-
- } else {
-
-// sign for jcount is different in GPU and CPU code -- abs is a quick fix
- int add_ncells = icount - abs(jcount);
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_counters[MESH_COUNTER_REZONE]++;
-
- static vector<int> celltype_save;
-
- static int new_ncells;
-
- static int *i_old, *j_old, *level_old;
-
- static int ifirst;
- static int ilast;
- static int jfirst;
- static int jlast;
- static int level_first;
- static int level_last;
-
- static vector<int> new_ic;
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- celltype_save.resize(ncells);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- if (have_state) {
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic = 0; ic < (int)ncells; ic++){
- celltype_save[ic] = celltype[ic];
- }
- }
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- new_ncells = ncells + add_ncells;
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-// int ref_entry_count = 0;
- if (have_state){
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells; ic++) {
-// if (mpot[ic] > 0) ref_entry_count++;
- if (mpot[ic] < 0) {
- // Normal cell coarsening
- if (is_lower_left(i[ic],j[ic]) ) mpot[ic] = -2;
- // Boundary cell case
- if (celltype[ic] != REAL_CELL && is_upper_right(i[ic],j[ic]) ) mpot[ic] = -3;
- }
- }
- }
-
- // Initialize new variables
-// int *i_old, *j_old, *level_old;
-
- int flags = RESTART_DATA;
-#ifdef HAVE_J7
- if (parallel) flags = LOAD_BALANCE_MEMORY;
-#endif
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- i_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "i_old", flags);
- j_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "j_old", flags);
- level_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "level_old", flags);
-
- mesh_memory.memory_swap(&i, &i_old);
- mesh_memory.memory_swap(&j, &j_old);
- mesh_memory.memory_swap(&level, &level_old);
-
- index.clear();
- index.resize(new_ncells);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- static vector<int> order; // Vector of refined mesh traversal order; set to -1 to indicate errors.
- //
- //vector<int> invorder(4, -1); // Vector mapping location from base index.
-
- //int ref_entry = 0;
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- // Insert new cells into the mesh at the point of refinement.
- order.resize(4, -1); // Vector of refined mesh traversal order; set to -1 to indicate errors.
-
- ifirst = 0;
- ilast = 0;
- jfirst = 0;
- jlast = 0;
- level_first = 0;
- level_last = 0;
-
- if (parallel) {
-#ifdef HAVE_MPI
- MPI_Request req[12];
- MPI_Status status[12];
-
- static int prev = MPI_PROC_NULL;
- static int next = MPI_PROC_NULL;
-
- if (mype != 0) prev = mype-1;
- if (mype < numpe - 1) next = mype+1;
-
- MPI_Isend(&i_old[ncells-1], 1,MPI_INT,next,1,MPI_COMM_WORLD,req+0);
- MPI_Irecv(&ifirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+1);
-
- MPI_Isend(&i_old[0], 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+2);
- MPI_Irecv(&ilast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+3);
-
- MPI_Isend(&j_old[ncells-1], 1,MPI_INT,next,1,MPI_COMM_WORLD,req+4);
- MPI_Irecv(&jfirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+5);
-
- MPI_Isend(&j_old[0], 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+6);
- MPI_Irecv(&jlast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+7);
-
- MPI_Isend(&level_old[ncells-1], 1,MPI_INT,next,1,MPI_COMM_WORLD,req+8);
- MPI_Irecv(&level_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+9);
-
- MPI_Isend(&level_old[0], 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+10);
- MPI_Irecv(&level_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+11);
-
- MPI_Waitall(12, req, status);
-#endif
- }
-
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef REZONE_NO_OPTIMIZATION
- vector<int> invorder(4, -1); // Vector mapping location from base index.
- for (int ic = 0, nc = 0; ic < (int)ncells; ic++)
- {
- if (mpot[ic] == 0 || mpot[ic] == -1000000)
- { // No change is needed; copy the old cell straight to the new mesh at this location.
- index[ic] = nc;
- i[nc] = i_old[ic];
- j[nc] = j_old[ic];
- level[nc] = level_old[ic];
- nc++;
- } // Complete no change needed.
-
- else if (mpot[ic] < 0)
- { // Coarsening is needed; remove this cell and the other three and replace them with one.
- index[ic] = nc;
- if (mpot[ic] <= -2) {
- //printf(" %d: DEBUG -- coarsening cell %d nc %d\n",mype,ic,nc);
- i[nc] = i_old[ic]/2;
- j[nc] = j_old[ic]/2;
- level[nc] = level_old[ic] - 1;
- nc++;
- }
- } // Coarsening complete.
-
- else if (mpot[ic] > 0)
- { // Refinement is needed; insert four cells where once was one.
- index[ic] = nc;
- if (celltype[ic] == REAL_CELL)
- {
- set_refinement_order(&order[0], ic, ifirst, ilast, jfirst, jlast,
- level_first, level_last, i_old, j_old, level_old);
-
- // Create the cells in the correct order and orientation.
- for (int ii = 0; ii < 4; ii++)
- { level[nc] = level_old[ic] + 1;
- switch (order[ii])
- { case SW:
- // lower left
- invorder[SW] = ii;
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2;
- nc++;
- break;
-
- case SE:
- // lower right
- invorder[SE] = ii;
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2;
- nc++;
- break;
-
- case NW:
- // upper left
- invorder[NW] = ii;
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2 + 1;
- nc++;
- break;
-
- case NE:
- // upper right
- invorder[NE] = ii;
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2 + 1;
- nc++;
- break; } } // Complete cell refinement.
- } // Complete real cell refinement.
-
- else if (celltype[ic] == LEFT_BOUNDARY) {
- // lower
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // upper
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- else if (celltype[ic] == RIGHT_BOUNDARY) {
- // lower
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // upper
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- else if (celltype[ic] == BOTTOM_BOUNDARY) {
- // left
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // right
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- else if (celltype[ic] == TOP_BOUNDARY) {
- // right
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // left
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- } // Complete refinement needed.
- } // Complete addition of new cells to the mesh.
-
- mesh_memory.memory_delete(i_old);
- mesh_memory.memory_delete(j_old);
- mesh_memory.memory_delete(level_old);
-
- calc_celltype(new_ncells);
-
- if (have_state){
- flags = RESTART_DATA;
- MallocPlus state_memory_old = state_memory;
- malloc_plus_memory_entry *memory_item;
-
- for (memory_item = state_memory_old.memory_entry_by_name_begin();
- memory_item != state_memory_old.memory_entry_by_name_end();
- memory_item = state_memory_old.memory_entry_by_name_next() ) {
- //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
- if (memory_item->mem_elsize == 8) {
- double *state_temp_double = (double *)state_memory.memory_malloc(new_ncells, sizeof(double),
- "state_temp_double", flags);
-
- double *mem_ptr_double = (double *)memory_item->mem_ptr;
-
- //ref_entry = 0;
- for (int ic=0, nc=0; ic<(int)ncells; ic++) {
-
- if (mpot[ic] == 0) {
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
- } else if (mpot[ic] < 0){
- if (mpot[ic] == -2) {
- int nr = nrht[ic];
- int nt = ntop[ic];
- int nrt = nrht[nt];
- state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nr] +
- mem_ptr_double[nt] + mem_ptr_double[nrt])*0.25;
- nc++;
- }
- if (mpot[ic] == -3) {
- int nl = nlft[ic];
- int nb = nbot[ic];
- int nlb = nlft[nb];
- state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nl] +
- mem_ptr_double[nb] + mem_ptr_double[nlb])*0.25;
- nc++;
- }
- } else if (mpot[ic] > 0){
- // lower left
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
-
- // lower right
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
-
- if (celltype_save[ic] == REAL_CELL){
- // upper left
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
-
- // upper right
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
- }
- }
- }
-
- state_memory.memory_replace(mem_ptr_double, state_temp_double);
- } else if (memory_item->mem_elsize == 4) {
- float *state_temp_float = (float *)state_memory.memory_malloc(new_ncells, sizeof(float),
- "state_temp_float", flags);
-
- float *mem_ptr_float = (float *)memory_item->mem_ptr;
-
- for (int ic=0, nc=0; ic<(int)ncells; ic++) {
-
- if (mpot[ic] == 0) {
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
- } else if (mpot[ic] < 0){
- if (mpot[ic] == -2) {
- int nr = nrht[ic];
- int nt = ntop[ic];
- int nrt = nrht[nt];
- state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nr] +
- mem_ptr_float[nt] + mem_ptr_float[nrt])*0.25;
- nc++;
- }
- if (mpot[ic] == -3) {
- int nl = nlft[ic];
- int nb = nbot[ic];
- int nlb = nlft[nb];
- state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nl] +
- mem_ptr_float[nb] + mem_ptr_float[nlb])*0.25;
- nc++;
- }
- } else if (mpot[ic] > 0){
- // lower left
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
-
- // lower right
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
-
- if (celltype_save[ic] == REAL_CELL){
- // upper left
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
-
- // upper right
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
- }
- }
- }
-
- state_memory.memory_replace(mem_ptr_float, state_temp_float);
- }
- }
- }
-#else
- // Data parallel optimizations for thread parallel -- slows down serial
- // code by about 25%
- static vector<int> add_count;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- add_count.resize(ncells);
- new_ic.resize(ncells+1);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic = 0; ic < (int)ncells; ic++){
- if (mpot[ic] == 0) {
- add_count[ic] = 1;
- } else if (mpot[ic] < 0) {
- if (mpot[ic] == -2){
- add_count[ic] = 1;
- } else {
- add_count[ic] = 0;
- }
- } else if (mpot[ic] > 0) {
- if (celltype[ic] != REAL_CELL) {
- add_count[ic] = 2;
- } else {
- add_count[ic] = 4;
- }
- }
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-#endif
- scan (&add_count[0], &new_ic[0], ncells);
-#ifdef _OPENMP
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic = 0; ic < (int)ncells; ic++) {
- vector<int> invorder(4, -1); // Vector mapping location from base index.
- int nc = new_ic[ic];
- if (mpot[ic] == 0)
- { // No change is needed; copy the old cell straight to the new mesh at this location.
- index[ic] = nc;
- i[nc] = i_old[ic];
- j[nc] = j_old[ic];
- level[nc] = level_old[ic];
- } // Complete no change needed.
-
- else if (mpot[ic] < 0)
- { // Coarsening is needed; remove this cell and the other three and replace them with one.
- index[ic] = nc;
- if (mpot[ic] <= -2) {
- //printf(" %d: DEBUG -- coarsening cell %d nc %d\n",mype,ic,nc);
- i[nc] = i_old[ic]/2;
- j[nc] = j_old[ic]/2;
- level[nc] = level_old[ic] - 1;
- }
- } // Coarsening complete.
-
- else if (mpot[ic] > 0)
- { // Refinement is needed; insert four cells where once was one.
- index[ic] = nc;
- if (celltype[ic] == REAL_CELL)
- {
- int order[4];
- set_refinement_order(&order[0], ic, ifirst, ilast, jfirst, jlast,
- level_first, level_last, i_old, j_old, level_old);
-
- // Create the cells in the correct order and orientation.
- for (int ii = 0; ii < 4; ii++) {
- level[nc] = level_old[ic] + 1;
- switch (order[ii]) {
- case SW:
- // lower left
- invorder[SW] = ii;
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2;
- nc++;
- break;
-
- case SE:
- // lower right
- invorder[SE] = ii;
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2;
- nc++;
- break;
-
- case NW:
- // upper left
- invorder[NW] = ii;
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2 + 1;
- nc++;
- break;
-
- case NE:
- // upper right
- invorder[NE] = ii;
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2 + 1;
- nc++;
- break;
- }
- } // Complete cell refinement.
- } // Complete real cell refinement.
-
- else if (celltype[ic] == LEFT_BOUNDARY) {
- // lower
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // upper
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- else if (celltype[ic] == RIGHT_BOUNDARY) {
- // lower
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // upper
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- else if (celltype[ic] == BOTTOM_BOUNDARY) {
- // left
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // right
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2 + 1;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- else if (celltype[ic] == TOP_BOUNDARY) {
- // right
- i[nc] = i_old[ic]*2 + 1;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
-
- // left
- i[nc] = i_old[ic]*2;
- j[nc] = j_old[ic]*2;
- level[nc] = level_old[ic] + 1;
- nc++;
- }
- } // Complete refinement needed.
- } // Complete addition of new cells to the mesh.
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- mesh_memory.memory_delete(i_old);
- mesh_memory.memory_delete(j_old);
- mesh_memory.memory_delete(level_old);
-#ifdef _OPENMP
- } // end master region
-#endif
-
- calc_celltype_threaded(new_ncells);
-
- if (have_state){
-
- static MallocPlus state_memory_old;
- static malloc_plus_memory_entry *memory_begin;
- static malloc_plus_memory_entry *memory_end;
- static malloc_plus_memory_entry *memory_next;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- state_memory_old = state_memory;
-
- memory_begin = state_memory_old.memory_entry_by_name_begin();
- memory_end = state_memory_old.memory_entry_by_name_end();
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- for (malloc_plus_memory_entry *memory_item = memory_begin;
- memory_item != memory_end;
- memory_item = memory_next ) {
- //ref_entry = 0;
- //printf("DEBUG -- memory_item->mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
- if (memory_item->mem_elsize == 8) {
-
- static double *state_temp_double, *mem_ptr_double;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- state_temp_double = (double *)state_memory.memory_malloc(new_ncells, sizeof(double),
- "state_temp_double", flags);
- mem_ptr_double = (double *)memory_item->mem_ptr;
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- //ref_entry = 0;
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic=0; ic<(int)ncells; ic++) {
-
- int nc = new_ic[ic];
- if (mpot[ic] == 0) {
- state_temp_double[nc] = mem_ptr_double[ic];
- } else if (mpot[ic] < 0){
- if (mpot[ic] == -2) {
- int nr = nrht[ic];
- int nt = ntop[ic];
- int nrt = nrht[nt];
- state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nr] +
- mem_ptr_double[nt] + mem_ptr_double[nrt])*0.25;
- }
- if (mpot[ic] == -3) {
- int nl = nlft[ic];
- int nb = nbot[ic];
- int nlb = nlft[nb];
- state_temp_double[nc] = (mem_ptr_double[ic] + mem_ptr_double[nl] +
- mem_ptr_double[nb] + mem_ptr_double[nlb])*0.25;
- }
- } else if (mpot[ic] > 0){
- // lower left
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
-
- // lower right
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
-
- if (celltype_save[ic] == REAL_CELL){
- // upper left
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
-
- // upper right
- state_temp_double[nc] = mem_ptr_double[ic];
- nc++;
- }
- }
- } // end cell loop
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- state_memory.memory_replace(mem_ptr_double, state_temp_double);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- } else if (memory_item->mem_elsize == 4) {
-
- static float *state_temp_float, *mem_ptr_float;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- state_temp_float = (float *)state_memory.memory_malloc(new_ncells, sizeof(float),
- "state_temp_float", flags);
- mem_ptr_float = (float *)memory_item->mem_ptr;
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic=0; ic<(int)ncells; ic++) {
-
- int nc = new_ic[ic];
- if (mpot[ic] == 0) {
- state_temp_float[nc] = mem_ptr_float[ic];
- } else if (mpot[ic] < 0){
- if (mpot[ic] == -2) {
- int nr = nrht[ic];
- int nt = ntop[ic];
- int nrt = nrht[nt];
- state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nr] +
- mem_ptr_float[nt] + mem_ptr_float[nrt])*0.25;
- }
- if (mpot[ic] == -3) {
- int nl = nlft[ic];
- int nb = nbot[ic];
- int nlb = nlft[nb];
- state_temp_float[nc] = (mem_ptr_float[ic] + mem_ptr_float[nl] +
- mem_ptr_float[nb] + mem_ptr_float[nlb])*0.25;
- }
- } else if (mpot[ic] > 0){
- // lower left
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
-
- // lower right
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
-
- if (celltype_save[ic] == REAL_CELL){
- // upper left
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
-
- // upper right
- state_temp_float[nc] = mem_ptr_float[ic];
- nc++;
- }
- }
- } // end cell loop
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- state_memory.memory_replace(mem_ptr_float, state_temp_float);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
- } // mem elem size 4 bytes
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- memory_next = state_memory_old.memory_entry_by_name_next();
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- } // memory item iteration
-
- } // if have state
- // End of data parallel optimizations
-#endif
-
- if (neighbor_remap) {
- int flags = 0;
- static int *nlft_old, *nrht_old, *nbot_old, *ntop_old;
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- nlft_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nlft_old", flags);
- nrht_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nrht_old", flags);
- nbot_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "nbot_old", flags);
- ntop_old = (int *)mesh_memory.memory_malloc(new_ncells, sizeof(int), "ntop_old", flags);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
- flags = RESTART_DATA;
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic = 0; ic < new_ncells; ic++){
- nlft_old[ic] = -1;
- nrht_old[ic] = -1;
- nbot_old[ic] = -1;
- ntop_old[ic] = -1;
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- mesh_memory.memory_swap(&nlft, &nlft_old);
- mesh_memory.memory_swap(&nrht, &nrht_old);
- mesh_memory.memory_swap(&nbot, &nbot_old);
- mesh_memory.memory_swap(&ntop, &ntop_old);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic = 0; ic < (int)ncells; ic++){
- int nc = index[ic];
-
- if (mpot[ic] == 0){
- if (nlft_old[ic] < (int)ncells && nlft_old[ic] >= 0){
- nlft[nc] = (mpot[nlft_old[ic]] == 0) ? index[nlft_old[ic]] : -1;
- }
- if (nrht_old[ic] < (int)ncells && nrht_old[ic] >= 0){
- nrht[nc] = (mpot[nrht_old[ic]] == 0) ? index[nrht_old[ic]] : -1;
- }
- if (nbot_old[ic] < (int)ncells && nbot_old[ic] >= 0){
- nbot[nc] = (mpot[nbot_old[ic]] == 0) ? index[nbot_old[ic]] : -1;
- }
- if (ntop_old[ic] < (int)ncells && ntop_old[ic] >= 0){
- ntop[nc] = (mpot[ntop_old[ic]] == 0) ? index[ntop_old[ic]] : -1;
- }
- } else if (mpot[ic] <= -2) {
- nlft[nc] = -1;
- nrht[nc] = -1;
- nbot[nc] = -1;
- ntop[nc] = -1;
- } else if (mpot[ic] > 0){
- nlft[nc] = -1;
- nlft[nc+1] = -1;
- nrht[nc] = -1;
- nrht[nc+1] = -1;
- nbot[nc] = -1;
- nbot[nc+1] = -1;
- ntop[nc] = -1;
- ntop[nc+1] = -1;
- if (celltype[nc] == REAL_CELL){
- nlft[nc+2] = -1;
- nlft[nc+3] = -1;
- nrht[nc+2] = -1;
- nrht[nc+3] = -1;
- nbot[nc+2] = -1;
- nbot[nc+3] = -1;
- ntop[nc+2] = -1;
- ntop[nc+3] = -1;
- }
- }
- if (mpot[ic] > 0){
- nc++;
- switch(celltype[nc]){
- case LEFT_BOUNDARY:
- nlft[nc] = nc;
- break;
- case RIGHT_BOUNDARY:
- nrht[nc] = nc;
- break;
- case BOTTOM_BOUNDARY:
- nbot[nc] = nc;
- break;
- case TOP_BOUNDARY:
- ntop[nc] = nc;
- break;
- }
- }
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- nlft_old = (int *)mesh_memory.memory_delete(nlft_old);
- nrht_old = (int *)mesh_memory.memory_delete(nrht_old);
- nbot_old = (int *)mesh_memory.memory_delete(nbot_old);
- ntop_old = (int *)mesh_memory.memory_delete(ntop_old);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
- } else {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- nlft = (int *)mesh_memory.memory_delete(nlft);
- nrht = (int *)mesh_memory.memory_delete(nrht);
- nbot = (int *)mesh_memory.memory_delete(nbot);
- ntop = (int *)mesh_memory.memory_delete(ntop);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- //ncells = nc;
-
-#ifdef HAVE_MPI
- if (parallel) {
- MPI_Allgather(&new_ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
-
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- noffset=ndispl[mype];
- ncells_global = ndispl[numpe-1]+nsizes[numpe-1];
- }
-#endif
-
- cpu_timers[MESH_TIMER_REZONE_ALL] += cpu_timer_stop(tstart_cpu);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- } // if do_rezone
-
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::gpu_rezone_all(int icount, int jcount, cl_mem &dev_mpot, MallocPlus &gpu_state_memory)
-{
- if (! gpu_do_rezone) return;
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- gpu_counters[MESH_COUNTER_REZONE]++;
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- assert(dev_mpot);
- assert(dev_level);
- assert(dev_i);
- assert(dev_j);
- assert(dev_celltype);
- assert(dev_ioffset);
- assert(dev_levdx);
- assert(dev_levdy);
-
- int add_ncells = icount - jcount;
-
-// int global_icount = icount;
-// int global_jcount = jcount;
-
- size_t old_ncells = ncells;
- size_t new_ncells = ncells + add_ncells;
-
-#ifdef HAVE_MPI
- //int global_add_ncells = add_ncells;
-
-// if (parallel) {
-// int count[2], global_count[2];
-// count[0] = icount;
-// count[1] = jcount;
-// MPI_Allreduce(&count, &global_count, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
-// global_icount = global_count[0];
-// global_jcount = global_count[1];
-// //global_add_ncells = global_icount + global_jcount;
-// }
-#endif
-
- int ifirst = 0;
- int ilast = 0;
- int jfirst = 0;
- int jlast = 0;
- int level_first = 0;
- int level_last = 0;
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- int i_tmp_first, i_tmp_last;
- int j_tmp_first, j_tmp_last;
- int level_tmp_first, level_tmp_last;
-
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, 1*sizeof(cl_int), &i_tmp_first, NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, 1*sizeof(cl_int), &j_tmp_first, NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, 1*sizeof(cl_int), &level_tmp_first, NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &i_tmp_last, NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &j_tmp_last, NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, (old_ncells-1)*sizeof(cl_int), 1*sizeof(cl_int), &level_tmp_last, NULL);
-
- MPI_Request req[12];
- MPI_Status status[12];
-
- static int prev = MPI_PROC_NULL;
- static int next = MPI_PROC_NULL;
-
- if (mype != 0) prev = mype-1;
- if (mype < numpe - 1) next = mype+1;
-
- MPI_Isend(&i_tmp_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+0);
- MPI_Irecv(&ifirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+1);
-
- MPI_Isend(&i_tmp_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+2);
- MPI_Irecv(&ilast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+3);
-
- MPI_Isend(&j_tmp_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+4);
- MPI_Irecv(&jfirst, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+5);
-
- MPI_Isend(&j_tmp_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+6);
- MPI_Irecv(&jlast, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+7);
-
- MPI_Isend(&level_tmp_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+8);
- MPI_Irecv(&level_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+9);
-
- MPI_Isend(&level_tmp_first, 1,MPI_INT,prev,1,MPI_COMM_WORLD,req+10);
- MPI_Irecv(&level_last, 1,MPI_INT,next,1,MPI_COMM_WORLD,req+11);
-
- MPI_Waitall(12, req, status);
- }
-#endif
-
-/*
- if (new_ncells != old_ncells){
- ncells = new_ncells;
- }
-*/
-
- size_t mem_request = (int)((float)new_ncells*mem_factor);
- cl_mem dev_celltype_new = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_level_new = ezcl_malloc(NULL, const_cast<char *>("dev_level_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_i_new = ezcl_malloc(NULL, const_cast<char *>("dev_i_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_j_new = ezcl_malloc(NULL, const_cast<char *>("dev_j_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- cl_mem dev_ijadd;
-
- vector<int>ijadd(6);
- if (numpe > 1) {
- ijadd[0] = ifirst;
- ijadd[1] = ilast;
- ijadd[2] = jfirst;
- ijadd[3] = jlast;
- ijadd[4] = level_first;
- ijadd[5] = level_last;
- }
-
- size_t six = 6;
- dev_ijadd = ezcl_malloc(NULL, const_cast<char *>("dev_ijadd"), &six, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- ezcl_enqueue_write_buffer(command_queue, dev_ijadd, CL_TRUE, 0, 6*sizeof(cl_int), (void*)&ijadd[0], NULL);
-
- cl_mem dev_indexoffset = ezcl_malloc(NULL, const_cast<char *>("dev_indexoffset"), &old_ncells, sizeof(cl_uint), CL_MEM_READ_WRITE, 0);
-
- int stencil = 0;
- if (localStencil) stencil = 1;
-
- size_t local_work_size = 128;
- size_t global_work_size = ((old_ncells+local_work_size - 1) /local_work_size) * local_work_size;
-
- ezcl_set_kernel_arg(kernel_rezone_all, 0, sizeof(cl_int), (void *)&old_ncells);
- ezcl_set_kernel_arg(kernel_rezone_all, 1, sizeof(cl_int), (void *)&stencil);
- ezcl_set_kernel_arg(kernel_rezone_all, 2, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_rezone_all, 3, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_rezone_all, 4, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_rezone_all, 5, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_rezone_all, 6, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_rezone_all, 7, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_rezone_all, 8, sizeof(cl_mem), (void *)&dev_level_new);
- ezcl_set_kernel_arg(kernel_rezone_all, 9, sizeof(cl_mem), (void *)&dev_i_new);
- ezcl_set_kernel_arg(kernel_rezone_all, 10, sizeof(cl_mem), (void *)&dev_j_new);
- ezcl_set_kernel_arg(kernel_rezone_all, 11, sizeof(cl_mem), (void *)&dev_celltype_new);
- ezcl_set_kernel_arg(kernel_rezone_all, 12, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_rezone_all, 13, sizeof(cl_mem), (void *)&dev_indexoffset);
- ezcl_set_kernel_arg(kernel_rezone_all, 14, sizeof(cl_mem), (void *)&dev_levdx);
- ezcl_set_kernel_arg(kernel_rezone_all, 15, sizeof(cl_mem), (void *)&dev_levdy);
- ezcl_set_kernel_arg(kernel_rezone_all, 16, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_rezone_all, 17, sizeof(cl_mem), (void *)&dev_ijadd);
- ezcl_set_kernel_arg(kernel_rezone_all, 18, local_work_size * sizeof(cl_uint), NULL);
- //ezcl_set_kernel_arg(kernel_rezone_all, 19, local_work_size * sizeof(cl_real4_t), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_all, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- MallocPlus gpu_state_memory_old = gpu_state_memory;
- malloc_plus_memory_entry *memory_item;
-
- for (memory_item = gpu_state_memory_old.memory_entry_by_name_begin();
- memory_item != gpu_state_memory_old.memory_entry_by_name_end();
- memory_item = gpu_state_memory_old.memory_entry_by_name_next() ) {
- //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
- cl_mem dev_state_mem_ptr = (cl_mem)memory_item->mem_ptr;
-
- if (memory_item->mem_elsize == 8){
-#ifndef MINIMUM_PRECISION
- cl_mem dev_state_var_new = (cl_mem)gpu_state_memory.memory_malloc(max(old_ncells,new_ncells), sizeof(cl_double), const_cast<char *>("dev_state_var_new"), DEVICE_REGULAR_MEMORY);
-
- ezcl_set_kernel_arg(kernel_rezone_one_double, 0, sizeof(cl_int), (void *)&old_ncells);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 1, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 2, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 3, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 4, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 5, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 6, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 7, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 8, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_rezone_one_double, 9, sizeof(cl_mem), (void *)&dev_indexoffset);
- ezcl_set_kernel_arg(kernel_rezone_one_double,10, sizeof(cl_mem), (void *)&dev_state_mem_ptr);
- ezcl_set_kernel_arg(kernel_rezone_one_double,11, sizeof(cl_mem), (void *)&dev_state_var_new);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_one_double, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
-#else
- printf("ERROR -- can't have double type for state variable\n");
- exit(1);
-#endif
- } else if (memory_item->mem_elsize == 4){
- cl_mem dev_state_var_new = (cl_mem)gpu_state_memory.memory_malloc(max(old_ncells,new_ncells), sizeof(cl_float), const_cast<char *>("dev_state_var_new"), DEVICE_REGULAR_MEMORY);
-
- ezcl_set_kernel_arg(kernel_rezone_one_float, 0, sizeof(cl_int), (void *)&old_ncells);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 1, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 2, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 3, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 4, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 5, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 6, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 7, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 8, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_rezone_one_float, 9, sizeof(cl_mem), (void *)&dev_indexoffset);
- ezcl_set_kernel_arg(kernel_rezone_one_float,10, sizeof(cl_mem), (void *)&dev_state_mem_ptr);
- ezcl_set_kernel_arg(kernel_rezone_one_float,11, sizeof(cl_mem), (void *)&dev_state_var_new);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_one_float, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
- }
- }
-
- if (neighbor_remap & ! parallel) {
- size_t mem_request = (int)((float)new_ncells*mem_factor);
- cl_mem dev_nlft_new = ezcl_malloc(NULL, const_cast<char *>("dev_nlft_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_nrht_new = ezcl_malloc(NULL, const_cast<char *>("dev_nrht_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_nbot_new = ezcl_malloc(NULL, const_cast<char *>("dev_nbot_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_ntop_new = ezcl_malloc(NULL, const_cast<char *>("dev_ntop_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_set_kernel_arg(kernel_neighbor_init, 0, sizeof(cl_int), (void *)&new_ncells);
- ezcl_set_kernel_arg(kernel_neighbor_init, 1, sizeof(cl_mem), (void *)&dev_nlft_new);
- ezcl_set_kernel_arg(kernel_neighbor_init, 2, sizeof(cl_mem), (void *)&dev_nrht_new);
- ezcl_set_kernel_arg(kernel_neighbor_init, 3, sizeof(cl_mem), (void *)&dev_nbot_new);
- ezcl_set_kernel_arg(kernel_neighbor_init, 4, sizeof(cl_mem), (void *)&dev_ntop_new);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_neighbor_init, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 0, sizeof(cl_int), (void *)&old_ncells);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 1, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 2, sizeof(cl_mem), (void *)&dev_indexoffset);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 3, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 4, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 5, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 6, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 7, sizeof(cl_mem), (void *)&dev_celltype_new);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 8, sizeof(cl_mem), (void *)&dev_nlft_new);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 9, sizeof(cl_mem), (void *)&dev_nrht_new);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 10, sizeof(cl_mem), (void *)&dev_nbot_new);
- ezcl_set_kernel_arg(kernel_rezone_neighbors, 11, sizeof(cl_mem), (void *)&dev_ntop_new);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_rezone_neighbors, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- ezcl_device_memory_swap(&dev_nlft, &dev_nlft_new);
- ezcl_device_memory_swap(&dev_nrht, &dev_nrht_new);
- ezcl_device_memory_swap(&dev_nbot, &dev_nbot_new);
- ezcl_device_memory_swap(&dev_ntop, &dev_ntop_new);
-
- ezcl_device_memory_delete(dev_nlft_new);
- ezcl_device_memory_delete(dev_nrht_new);
- ezcl_device_memory_delete(dev_nbot_new);
- ezcl_device_memory_delete(dev_ntop_new);
- } else {
- ezcl_device_memory_delete(dev_nlft);
- ezcl_device_memory_delete(dev_nrht);
- ezcl_device_memory_delete(dev_nbot);
- ezcl_device_memory_delete(dev_ntop);
- dev_nlft = NULL;
- dev_nrht = NULL;
- dev_nbot = NULL;
- dev_ntop = NULL;
- }
-
- ezcl_device_memory_delete(dev_indexoffset);
-
- if (new_ncells != old_ncells){
- resize_old_device_memory(new_ncells);
- }
-
- ezcl_device_memory_swap(&dev_celltype, &dev_celltype_new);
- ezcl_device_memory_swap(&dev_level, &dev_level_new);
- ezcl_device_memory_swap(&dev_i, &dev_i_new);
- ezcl_device_memory_swap(&dev_j, &dev_j_new);
-
- ezcl_device_memory_delete(dev_mpot);
- ezcl_device_memory_delete(dev_ijadd);
- ezcl_device_memory_delete(dev_ioffset);
-
- ezcl_device_memory_delete(dev_i_new);
- ezcl_device_memory_delete(dev_j_new);
- ezcl_device_memory_delete(dev_celltype_new);
- ezcl_device_memory_delete(dev_level_new);
-
-#ifdef HAVE_MPI
- if (parallel) {
- int new_ncells = ncells + add_ncells;
- MPI_Allgather(&new_ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
-
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- noffset=ndispl[mype];
- ncells_global = ndispl[numpe-1]+nsizes[numpe-1];
- }
-#endif
-
- gpu_timers[MESH_TIMER_REZONE_ALL] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
-}
-#endif
-
-void Mesh::calc_neighbors(int ncells)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- if (do_rezone) {
-
- int flags = INDEX_ARRAY_MEMORY;
-
-#if defined (HAVE_J7)
- if (parallel) flags |= LOAD_BALANCE_MEMORY;
-#endif
-
- static int nlft_size = 0;
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- cpu_counters[MESH_COUNTER_CALC_NEIGH]++;
-
- if (nlft != NULL){
- nlft_size = mesh_memory.get_memory_size(nlft);
- }
-
- if (nlft_size < ncells){
- if (nlft != NULL){
- nlft = (int *)mesh_memory.memory_delete(nlft);
- nrht = (int *)mesh_memory.memory_delete(nrht);
- nbot = (int *)mesh_memory.memory_delete(nbot);
- ntop = (int *)mesh_memory.memory_delete(ntop);
- }
-
- nlft = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nlft", flags);
- nrht = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nrht", flags);
- nbot = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nbot", flags);
- ntop = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "ntop", flags);
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- if (nlft_size < ncells){
- int lowerBounds, upperBounds;
- get_bounds(lowerBounds, upperBounds);
-
- for(int ic=lowerBounds; ic<upperBounds; ic++){
- nlft[ic] = -1;
- nrht[ic] = -1;
- nbot[ic] = -1;
- ntop[ic] = -1;
- }
- }
-
- if (calc_neighbor_type == HASH_TABLE) {
-
- struct timeval tstart_lev2;
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
- int jmaxsize = (jmax+1)*IPOW2(levmx);
- int imaxsize = (imax+1)*IPOW2(levmx);
-
- int *hash;
-
-#ifdef _OPENMP
- hash = compact_hash_init_openmp(ncells, imaxsize, jmaxsize, 0);
-#else
- hash = compact_hash_init(ncells, imaxsize, jmaxsize, 0);
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for(int ic=0; ic<ncells; ic++){
- int lev = level[ic];
-
- bool need_hash = (nlft[ic] == -1 || nrht[ic] == -1 || nbot[ic] == -1 || ntop[ic] == -1) ? true : false;
-
- if (! need_hash){
- if ( (level[nlft[ic]] > lev && ntop[nlft[ic]] == -1) ||
- (level[nrht[ic]] > lev && ntop[nrht[ic]] == -1) ||
- (level[nbot[ic]] > lev && nrht[nbot[ic]] == -1) ||
- (level[ntop[ic]] > lev && nrht[ntop[ic]] == -1) ) need_hash = true;
- }
-
- if (need_hash) {
- int levmult = IPOW2(levmx-lev);
- int ii = i[ic]*levmult;
- int jj = j[ic]*levmult;
-
- write_hash(ic,jj*imaxsize+ii,hash);
- }
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_HASH_SETUP] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- //fprintf(fp,"DEBUG ncells is %lu\n",ncells);
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic=0; ic<(int)ncells; ic++){
- int ii = i[ic];
- int jj = j[ic];
- int lev = level[ic];
- int levmult = IPOW2(levmx-lev);
- int iicur = ii*levmult;
- int iilft = max( (ii-1)*levmult, 0 );
- int iirht = min( (ii+1)*levmult, imaxsize-1);
- int jjcur = jj*levmult;
- int jjbot = max( (jj-1)*levmult, 0 );
- int jjtop = min( (jj+1)*levmult, jmaxsize-1);
-
- int nlftval = nlft[ic];
- int nrhtval = nrht[ic];
- int nbotval = nbot[ic];
- int ntopval = ntop[ic];
-
- // Taking care of boundary cells
- // Force each boundary cell to point to itself on its boundary direction
- if (nlftval < 0 && iicur < 1*IPOW2(levmx) ) nlftval = ic;
- if (nbotval < 0 && jjcur < 1*IPOW2(levmx) ) nbotval = ic;
- if (nrhtval < 0 && iicur > imax*IPOW2(levmx)-1) nrhtval = ic;
- if (ntopval < 0 && jjcur > jmax*IPOW2(levmx)-1) ntopval = ic;
- // Boundary cells next to corner boundary need special checks
- if (nlftval < 0 && iicur == 1*IPOW2(levmx) && (jjcur < 1*IPOW2(levmx) || jjcur >= jmax*IPOW2(levmx) ) ) nlftval = ic;
- if (nbotval < 0 && jjcur == 1*IPOW2(levmx) && (iicur < 1*IPOW2(levmx) || iicur >= imax*IPOW2(levmx) ) ) nbotval = ic;
- if (nrhtval < 0 && iirht == imax*IPOW2(levmx) && (jjcur < 1*IPOW2(levmx) || jjcur >= jmax*IPOW2(levmx) ) ) nrhtval = ic;
- if (ntopval < 0 && jjtop == jmax*IPOW2(levmx) && (iicur < 1*IPOW2(levmx) || iicur >= imax*IPOW2(levmx) ) ) ntopval = ic;
-
- // need to check for finer neighbor first
- // Right and top neighbor don't change for finer, so drop through to same size
- // Left and bottom need to be half of same size index for finer test
- if (lev != levmx) {
- int iilftfiner = iicur-(iicur-iilft)/2;
- //int iirhtfiner = (iicur+iirht)/2;
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- //int jjtopfiner = (jjcur+jjtop)/2;
- if (nlftval < 0) nlftval = read_hash(jjcur*imaxsize+iilftfiner, hash);
- if (nbotval < 0) nbotval = read_hash(jjbotfiner*imaxsize+iicur, hash);
- }
-
- // same size neighbor
- if (nlftval < 0) nlftval = read_hash(jjcur*imaxsize+iilft, hash);
- if (nrhtval < 0) nrhtval = read_hash(jjcur*imaxsize+iirht, hash);
- if (nbotval < 0) nbotval = read_hash(jjbot*imaxsize+iicur, hash);
- if (ntopval < 0) ntopval = read_hash(jjtop*imaxsize+iicur, hash);
-
- // Now we need to take care of special case where bottom and left boundary need adjustment since
- // expected cell doesn't exist on these boundaries if it is finer than current cell
- if (lev != levmx) {
- if (jjcur < 1*IPOW2(levmx)) {
- if (nrhtval < 0) {
- int jjtopfiner = (jjcur+jjtop)/2;
- nrhtval = read_hash(jjtopfiner*imaxsize+iirht, hash);
- }
- if (nlftval < 0) {
- int iilftfiner = iicur-(iicur-iilft)/2;
- int jjtopfiner = (jjcur+jjtop)/2;
- nlftval = read_hash(jjtopfiner*imaxsize+iilftfiner, hash);
- }
- }
-
- if (iicur < 1*IPOW2(levmx)) {
- if (ntopval < 0) {
- int iirhtfiner = (iicur+iirht)/2;
- ntopval = read_hash(jjtop*imaxsize+iirhtfiner, hash);
- }
- if (nbotval < 0) {
- int iirhtfiner = (iicur+iirht)/2;
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- nbotval = read_hash(jjbotfiner*imaxsize+iirhtfiner, hash);
- }
- }
- }
-
- // coarser neighbor
- if (lev != 0){
- if (nlftval < 0) {
- iilft -= iicur-iilft;
- int jjlft = (jj/2)*2*levmult;
- nlftval = read_hash(jjlft*imaxsize+iilft, hash);
- }
- if (nrhtval < 0) {
- int jjrht = (jj/2)*2*levmult;
- nrhtval = read_hash(jjrht*imaxsize+iirht, hash);
- }
- if (nbotval < 0) {
- jjbot -= jjcur-jjbot;
- int iibot = (ii/2)*2*levmult;
- nbotval = read_hash(jjbot*imaxsize+iibot, hash);
- }
- if (ntopval < 0) {
- int iitop = (ii/2)*2*levmult;
- ntopval = read_hash(jjtop*imaxsize+iitop, hash);
- }
- }
-
- nlft[ic] = nlftval;
- nrht[ic] = nrhtval;
- nbot[ic] = nbotval;
- ntop[ic] = ntopval;
-
- //printf("neighbors[%d] = %d %d %d %d\n",ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- write_hash_collision_report();
- read_hash_collision_report();
-
- compact_hash_delete(hash);
-
- if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_HASH_QUERY] += cpu_timer_stop(tstart_lev2);
-#ifdef _OPENMP
- } // master block
-#endif
-
- } else if (calc_neighbor_type == KDTREE) {
-
- struct timeval tstart_lev2;
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- TBounds box;
- vector<int> index_list(IPOW2(levmx*levmx) );
-
- int num;
-
- ibase = 0;
- calc_spatial_coordinates(ibase);
-
- kdtree_setup();
-
- if (TIMING_LEVEL >= 2) {
- cpu_timers[MESH_TIMER_KDTREE_SETUP] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- for (int ic=0; ic<ncells; ic++) {
-
- //left
- nlft[ic] = ic;
- box.min.x = x[ic]-0.25*dx[ic];
- box.max.x = x[ic]-0.25*dx[ic];
- box.min.y = y[ic]+0.25*dy[ic];
- box.max.y = y[ic]+0.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) nlft[ic]=index_list[0];
-
- //right
- nrht[ic] = ic;
- box.min.x = x[ic]+1.25*dx[ic];
- box.max.x = x[ic]+1.25*dx[ic];
- box.min.y = y[ic]+0.25*dy[ic];
- box.max.y = y[ic]+0.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) nrht[ic]=index_list[0];
-
- //bot
- nbot[ic] = ic;
- box.min.x = x[ic]+0.25*dx[ic];
- box.max.x = x[ic]+0.25*dx[ic];
- box.min.y = y[ic]-0.25*dy[ic];
- box.max.y = y[ic]-0.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) nbot[ic]=index_list[0];
-
- //top
- ntop[ic] = ic;
- box.min.x = x[ic]+0.25*dx[ic];
- box.max.x = x[ic]+0.25*dx[ic];
- box.min.y = y[ic]+1.25*dy[ic];
- box.max.y = y[ic]+1.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) ntop[ic]=index_list[0];
- } // End main loop over cells.
-
- KDTree_Destroy(&tree);
-
- if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_KDTREE_QUERY] += cpu_timer_stop(tstart_lev2);
-
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
- } // calc_neighbor_type
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- ncells_ghost = ncells;
-
- }
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_CALC_NEIGHBORS] += cpu_timer_stop(tstart_cpu);
-}
-
-void Mesh::calc_neighbors_local(void)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- if (do_rezone) {
-
- int flags = INDEX_ARRAY_MEMORY;
-
-#if defined (HAVE_J7)
- if (parallel) flags |= LOAD_BALANCE_MEMORY;
-#endif
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- cpu_counters[MESH_COUNTER_CALC_NEIGH]++;
-
- if (mesh_memory.get_memory_size(nlft) < ncells){
- if (nlft != NULL) nlft = (int *)mesh_memory.memory_delete(nlft);
- if (nrht != NULL) nrht = (int *)mesh_memory.memory_delete(nrht);
- if (nbot != NULL) nbot = (int *)mesh_memory.memory_delete(nbot);
- if (ntop != NULL) ntop = (int *)mesh_memory.memory_delete(ntop);
- nlft = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nlft", flags);
- nrht = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nrht", flags);
- nbot = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "nbot", flags);
- ntop = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "ntop", flags);
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- int lowerBound, upperBound;
- set_bounds(ncells);
- get_bounds(lowerBound, upperBound);
- for (int ic = lowerBound; ic < upperBound; ic++){
- nlft[ic] = -98;
- nrht[ic] = -98;
- nbot[ic] = -98;
- ntop[ic] = -98;
- }
-
- if (calc_neighbor_type == HASH_TABLE) {
-
- struct timeval tstart_lev2;
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
- ncells_ghost = ncells;
-
- // Find maximum i column and j row for this processor
- static int jmintile, imintile, jmaxtile, imaxtile;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- jmintile = (jmax+1)*IPOW2(levmx);
- imintile = (imax+1)*IPOW2(levmx);
- jmaxtile = 0;
- imaxtile = 0;
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- int my_jmintile = jmintile;
- int my_imintile = imintile;
- int my_jmaxtile = 0;
- int my_imaxtile = 0;
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for(uint ic=0; ic<ncells; ic++){
- int lev = level[ic];
-// if (lev < 0 || lev > levmx) printf("DEBUG -- cell %d lev %d\n",ic,level[ic]);
- if ( j[ic] *IPOW2(levmx-lev) < my_jmintile) my_jmintile = j[ic] *IPOW2(levmx-lev) ;
- if ((j[ic]+1)*IPOW2(levmx-lev)-1 > my_jmaxtile) my_jmaxtile = (j[ic]+1)*IPOW2(levmx-lev)-1;
- if ( i[ic] *IPOW2(levmx-lev) < my_imintile) my_imintile = i[ic] *IPOW2(levmx-lev) ;
- if ((i[ic]+1)*IPOW2(levmx-lev)-1 > my_imaxtile) my_imaxtile = (i[ic]+1)*IPOW2(levmx-lev)-1;
- }
-#ifdef _OPENMP
-#pragma omp critical
- {
-#endif
- if (my_jmintile < jmintile) jmintile = my_jmintile;
- if (my_imintile < imintile) imintile = my_imintile;
- if (my_jmaxtile > jmaxtile) jmaxtile = my_jmaxtile;
- if (my_imaxtile > imaxtile) imaxtile = my_imaxtile;
-#ifdef _OPENMP
- } // end critical region
-#pragma omp barrier
-#endif
-
- //if (DEBUG) fprintf(fp,"%d: Tile Sizes are imin %d imax %d jmin %d jmax %d\n",mype,imintile,imaxtile,jmintile,jmaxtile);
-
- // Expand size by 2*coarse_cells for ghost cells
- int jminsize = max(jmintile-2*IPOW2(levmx),0);
- int jmaxsize = min(jmaxtile+2*IPOW2(levmx),(jmax+1)*IPOW2(levmx));
- int iminsize = max(imintile-2*IPOW2(levmx),0);
- int imaxsize = min(imaxtile+2*IPOW2(levmx),(imax+1)*IPOW2(levmx));
- //if (DEBUG) fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize);
-
- //fprintf(fp,"DEBUG -- ncells %lu\n",ncells);
-
- static int *hash;
-
-#ifdef _OPENMP
- hash = compact_hash_init_openmp(ncells, imaxsize-iminsize, jmaxsize-jminsize, 0);
-#else
- hash = compact_hash_init(ncells, imaxsize-iminsize, jmaxsize-jminsize, 0);
-#endif
-
- //printf("%d: DEBUG -- noffset %d cells %d\n",mype,noffset,ncells);
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize);
- }
-
- static int imaxcalc, jmaxcalc;
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for(uint ic=0; ic<ncells; ic++){
- int cellnumber = ic+noffset;
- int lev = level[ic];
- int levmult = IPOW2(levmx-lev);
- int ii = i[ic]*levmult-iminsize;
- int jj = j[ic]*levmult-jminsize;
-
- write_hash(cellnumber, jj*(imaxsize-iminsize)+ii, hash);
- } // end for loop
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_HASH_SETUP] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- // Set neighbors to global cell numbers from hash
- jmaxcalc = (jmax+1)*IPOW2(levmx);
- imaxcalc = (imax+1)*IPOW2(levmx);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells; ic++){
- int ii = i[ic];
- int jj = j[ic];
- int lev = level[ic];
- int levmult = IPOW2(levmx-lev);
-
- int iicur = ii*levmult-iminsize;
- int iilft = max( (ii-1)*levmult, 0 )-iminsize;
- int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;
- int jjcur = jj*levmult-jminsize;
- int jjbot = max( (jj-1)*levmult, 0 )-jminsize;
- int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
-
- int nlftval = -1;
- int nrhtval = -1;
- int nbotval = -1;
- int ntopval = -1;
-
- // Taking care of boundary cells
- // Force each boundary cell to point to itself on its boundary direction
- if (iicur < 1*IPOW2(levmx) -iminsize) nlftval = ic+noffset;
- if (jjcur < 1*IPOW2(levmx) -jminsize) nbotval = ic+noffset;
- if (iicur > imax*IPOW2(levmx)-1-iminsize) nrhtval = ic+noffset;
- if (jjcur > jmax*IPOW2(levmx)-1-jminsize) ntopval = ic+noffset;
- // Boundary cells next to corner boundary need special checks
- if (iicur == 1*IPOW2(levmx)-iminsize && (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nlftval = ic+noffset;
- if (jjcur == 1*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) nbotval = ic+noffset;
- if (iirht == imax*IPOW2(levmx)-iminsize && (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nrhtval = ic+noffset;
- if (jjtop == jmax*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) ntopval = ic+noffset;
-
- // need to check for finer neighbor first
- // Right and top neighbor don't change for finer, so drop through to same size
- // Left and bottom need to be half of same size index for finer test
- if (lev != levmx) {
- int iilftfiner = iicur-(iicur-iilft)/2;
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- if (nlftval < 0) nlftval = read_hash(jjcur *(imaxsize-iminsize)+iilftfiner, hash);
- if (nbotval < 0) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash);
- }
-
- // same size neighbor
- if (nlftval < 0) {
- int nlfttry = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
- if (nlfttry >= 0 && nlfttry < (int)ncells && level[nlfttry] == lev) nlftval = nlfttry;
- }
- if (nrhtval < 0) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
- if (nbotval < 0) {
- int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
- if (nbottry >= 0 && nbottry < (int)ncells && level[nbottry] == lev) nbotval = nbottry;
- }
- if (ntopval < 0) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
-
- // Now we need to take care of special case where bottom and left boundary need adjustment since
- // expected cell doesn't exist on these boundaries if it is finer than current cell
- if (lev != levmx) {
- if (jjcur < 1*IPOW2(levmx)) {
- if (nrhtval < 0) {
- int jjtopfiner = (jjcur+jjtop)/2;
- nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
- }
- if (nlftval < 0) {
- int iilftfiner = iicur-(iicur-iilft)/2;
- int jjtopfiner = (jjcur+jjtop)/2;
- nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
- }
- }
-
- if (iicur < 1*IPOW2(levmx)) {
- if (ntopval < 0) {
- int iirhtfiner = (iicur+iirht)/2;
- ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
- }
- if (nbotval < 0) {
- int iirhtfiner = (iicur+iirht)/2;
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
- }
- }
- }
-
- // coarser neighbor
- if (lev != 0){
- if (nlftval < 0) {
- iilft -= iicur-iilft;
- int jjlft = (jj/2)*2*levmult-jminsize;
- int nlfttry = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash);
- if (nlfttry >= 0 && nlfttry < (int)ncells && level[nlfttry] == lev-1) nlftval = nlfttry;
- }
- if (nrhtval < 0) {
- int jjrht = (jj/2)*2*levmult-jminsize;
- int nrhttry = read_hash(jjrht*(imaxsize-iminsize)+iirht, hash);
- if (nrhttry >= 0 && nrhttry < (int)ncells && level[nrhttry] == lev-1) nrhtval = nrhttry;
- }
- if (nbotval < 0) {
- jjbot -= jjcur-jjbot;
- int iibot = (ii/2)*2*levmult-iminsize;
- int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash);
- if (nbottry >= 0 && nbottry < (int)ncells && level[nbottry] == lev-1) nbotval = nbottry;
- }
- if (ntopval < 0) {
- int iitop = (ii/2)*2*levmult-iminsize;
- int ntoptry = read_hash(jjtop*(imaxsize-iminsize)+iitop, hash);
- if (ntoptry >= 0 && ntoptry < (int)ncells && level[ntoptry] == lev-1) ntopval = ntoptry;
- }
- }
-
- nlft[ic] = nlftval;
- nrht[ic] = nrhtval;
- nbot[ic] = nbotval;
- ntop[ic] = ntopval;
-
- //fprintf(fp,"%d: neighbors[%d] = %d %d %d %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- print_local();
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH 0 numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash));
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nlft numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",nlft[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nrht numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",nrht[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nbot numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",nbot[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n ntop numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash)-noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",ntop[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_HASH_QUERY] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- static int num_comm_partners;
-
- static vector<int> iminsize_global;
- static vector<int> imaxsize_global;
- static vector<int> jminsize_global;
- static vector<int> jmaxsize_global;
- static vector<int> comm_partner;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- iminsize_global.resize(numpe);
- imaxsize_global.resize(numpe);
- jminsize_global.resize(numpe);
- jmaxsize_global.resize(numpe);
- comm_partner.resize(numpe,-1);
-
- MPI_Allgather(&iminsize, 1, MPI_INT, &iminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
- MPI_Allgather(&imaxsize, 1, MPI_INT, &imaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
- MPI_Allgather(&jminsize, 1, MPI_INT, &jminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
- MPI_Allgather(&jmaxsize, 1, MPI_INT, &jmaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
-
- num_comm_partners = 0;
- for (int ip = 0; ip < numpe; ip++){
- if (ip == mype) continue;
- if (iminsize_global[ip] > imaxtile) continue;
- if (imaxsize_global[ip] < imintile) continue;
- if (jminsize_global[ip] > jmaxtile) continue;
- if (jmaxsize_global[ip] < jmintile) continue;
- comm_partner[num_comm_partners] = ip;
- num_comm_partners++;
- //if (DEBUG) fprintf(fp,"%d: overlap with processor %d bounding box is %d %d %d %d\n",mype,ip,iminsize_global[ip],imaxsize_global[ip],jminsize_global[ip],jmaxsize_global[ip]);
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- static vector<int> border_cell;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- border_cell.resize(ncells);
-
-#ifdef BOUNDS_CHECK
- for (uint ic=0; ic<ncells; ic++){
- int nl = nlft[ic];
- if (nl != -1){
- nl -= noffset;
- if (nl<0 || nl>= (int)ncells) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
- }
- int nr = nrht[ic];
- if (nr != -1){
- nr -= noffset;
- if (nr<0 || nr>= (int)ncells) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
- }
- int nb = nbot[ic];
- if (nb != -1){
- nb -= noffset;
- if (nb<0 || nb>= (int)ncells) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
- }
- int nt = ntop[ic];
- if (nt != -1){
- nt -= noffset;
- if (nt<0 || nt>= (int)ncells) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt);
- }
- }
-#endif
-
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- static vector<int> border_cell_out;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- border_cell_out.resize(ncells);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells; ic++){
- int iborder_cell = 0;
-
- // left neighbor is undefined -- or -- if left is at finer level check left top for undefined
- if (nlft[ic] == -1 || (level[nlft[ic]-noffset] > level[ic] && ntop[nlft[ic]-noffset] == -1) ){
- iborder_cell |= 0x0001;
- }
- if (nrht[ic] == -1 || (level[nrht[ic]-noffset] > level[ic] && ntop[nrht[ic]-noffset] == -1) ){
- iborder_cell |= 0x0002;
- }
- if (nbot[ic] == -1 || (level[nbot[ic]-noffset] > level[ic] && nrht[nbot[ic]-noffset] == -1) ) {
- iborder_cell |= 0x0004;
- }
- if (ntop[ic] == -1 || (level[ntop[ic]-noffset] > level[ic] && nrht[ntop[ic]-noffset] == -1) ) {
- iborder_cell |= 0x0008;
- }
-
- border_cell[ic] = iborder_cell;
- }
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells; ic++){
- int iborder_cell = border_cell[ic];
-
- if (iborder_cell == 0) {
-
- int nl = nlft[ic]-noffset;
- if (nl >= 0 && nl < (int)ncells) {
- if ((border_cell[nl] & 0x0001) == 0x0001) {
- iborder_cell |= 0x0016;
- } else if (level[nl] > level[ic]){
- int ntl = ntop[nl]-noffset;
- if (ntl >= 0 && ntl < (int)ncells && (border_cell[ntl] & 0x0001) == 0x0001) {
- iborder_cell |= 0x0016;
- }
- }
- }
- int nr = nrht[ic]-noffset;
- if (nr >= 0 && nr < (int)ncells) {
- if ((border_cell[nrht[ic]-noffset] & 0x0002) == 0x0002) {
- iborder_cell |= 0x0032;
- } else if (level[nr] > level[ic]){
- int ntr = ntop[nr]-noffset;
- if (ntr >= 0 && ntr < (int)ncells && (border_cell[ntr] & 0x0002) == 0x0002) {
- iborder_cell |= 0x0032;
- }
- }
- }
- int nb = nbot[ic]-noffset;
- if (nb >= 0 && nb < (int)ncells) {
- if ((border_cell[nb] & 0x0004) == 0x0004) {
- iborder_cell |= 0x0064;
- } else if (level[nb] > level[ic]){
- int nrb = nrht[nb]-noffset;
- if (nrb >= 0 && nrb < (int)ncells && (border_cell[nrb] & 0x0004) == 0x0004) {
- iborder_cell |= 0x0064;
- }
- }
- }
- int nt = ntop[ic]-noffset;
- if (nt >= 0 && nt < (int)ncells) {
- if ((border_cell[nt] & 0x0008) == 0x0008) {
- iborder_cell |= 0x0128;
- } else if (level[nt] > level[ic]){
- int nrt = nrht[nt]-noffset;
- if (nrt >= 0 && nrt < (int)ncells && (border_cell[nrt] & 0x0008) == 0x0008) {
- iborder_cell |= 0x0128;
- }
- }
- }
- }
-
- border_cell_out[ic] = iborder_cell;
- }
-// indent offset
-
- vector<int> border_cell_num;
-
- static int nbsize_local;
-
- static vector<int> border_cell_i;
- static vector<int> border_cell_j;
- static vector<int> border_cell_level;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- for (int ic=0; ic<(int)ncells; ic++){
- if (border_cell_out[ic] > 0) border_cell_num.push_back(ic+noffset);
- }
- //printf("%d: border cell size is %d\n",mype,border_cell_num.size());
-
- nbsize_local = border_cell_num.size();
-
- border_cell_i.resize(nbsize_local);
- border_cell_j.resize(nbsize_local);
- border_cell_level.resize(nbsize_local);
-
- for (int ic = 0; ic <nbsize_local; ic++){
- int cell_num = border_cell_num[ic]-noffset;
- border_cell_i[ic] = i[cell_num];
- border_cell_j[ic] = j[cell_num];
- border_cell_level[ic] = level[cell_num];
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- fprintf(fp,"%d: Border cell size is %d\n",mype,nbsize_local);
- for (int ib = 0; ib <nbsize_local; ib++){
- fprintf(fp,"%d: Border cell %d is %d i %d j %d level %d\n",mype,ib,border_cell_num[ib],
- border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_FIND_BOUNDARY] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- // Allocate push database
-
- static int **send_database;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- send_database = (int**)malloc(num_comm_partners*sizeof(int *));
- for (int ip = 0; ip < num_comm_partners; ip++){
- send_database[ip] = (int *)malloc(nbsize_local*sizeof(int));
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- // Compute the overlap between processor bounding boxes and set up push database
-
- static vector<int> send_buffer_count;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- send_buffer_count.resize(num_comm_partners);
- for (int ip = 0; ip < num_comm_partners; ip++){
- int icount = 0;
- for (int ib = 0; ib <nbsize_local; ib++){
- int lev = border_cell_level[ib];
- int levmult = IPOW2(levmx-lev);
- if (border_cell_i[ib]*levmult >= iminsize_global[comm_partner[ip]] &&
- border_cell_i[ib]*levmult <= imaxsize_global[comm_partner[ip]] &&
- border_cell_j[ib]*levmult >= jminsize_global[comm_partner[ip]] &&
- border_cell_j[ib]*levmult <= jmaxsize_global[comm_partner[ip]] ) {
- // border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
- send_database[ip][icount] = ib;
- icount++;
- }
- }
- send_buffer_count[ip]=icount;
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- // Initialize L7_Push_Setup with num_comm_partners, comm_partner, send_database and
- // send_buffer_count. L7_Push_Setup will copy data and determine recv_buffer_counts.
- // It will return receive_count_total for use in allocations
-
- static int receive_count_total;
- int i_push_handle = 0;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- i_push_handle = 0;
- L7_Push_Setup(num_comm_partners, &comm_partner[0], &send_buffer_count[0],
- send_database, &receive_count_total, &i_push_handle);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- fprintf(fp,"DEBUG num_comm_partners %d\n",num_comm_partners);
- for (int ip = 0; ip < num_comm_partners; ip++){
- fprintf(fp,"DEBUG comm partner is %d data count is %d\n",comm_partner[ip],send_buffer_count[ip]);
- for (int ic = 0; ic < send_buffer_count[ip]; ic++){
- int ib = send_database[ip][ic];
- fprintf(fp,"DEBUG \t index %d cell number %d i %d j %d level %d\n",ib,border_cell_num[ib],
- border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
- }
- }
-#ifdef _OPENMP
- }
-#endif
- }
-
- // Can now free the send database. Other arrays are vectors and will automatically
- // deallocate
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- for (int ip = 0; ip < num_comm_partners; ip++){
- free(send_database[ip]);
- }
- free(send_database);
-#ifdef _OPENMP
- }
-#endif
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_PUSH_SETUP] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- // Push the data needed to the adjacent processors
- static int *border_cell_num_local;
- static int *border_cell_i_local;
- static int *border_cell_j_local;
- static int *border_cell_level_local;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- border_cell_num_local = (int *)malloc(receive_count_total*sizeof(int));
- border_cell_i_local = (int *)malloc(receive_count_total*sizeof(int));
- border_cell_j_local = (int *)malloc(receive_count_total*sizeof(int));
- border_cell_level_local = (int *)malloc(receive_count_total*sizeof(int));
-
- L7_Push_Update(&border_cell_num[0], border_cell_num_local, i_push_handle);
- L7_Push_Update(&border_cell_i[0], border_cell_i_local, i_push_handle);
- L7_Push_Update(&border_cell_j[0], border_cell_j_local, i_push_handle);
- L7_Push_Update(&border_cell_level[0], border_cell_level_local, i_push_handle);
-
- L7_Push_Free(&i_push_handle);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- nbsize_local = receive_count_total;
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- for (int ic = 0; ic < nbsize_local; ic++) {
- fprintf(fp,"%d: Local Border cell %d is %d i %d j %d level %d\n",mype,ic,border_cell_num_local[ic],
- border_cell_i_local[ic],border_cell_j_local[ic],border_cell_level_local[ic]);
- }
-#ifdef _OPENMP
- }
-#endif
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_PUSH_BOUNDARY] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_LOCAL_LIST] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering before layer 1\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash));
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-#ifdef _OPENMP
- }
-#endif
- }
-
- vector<int> border_cell_needed_local;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- border_cell_needed_local.resize(nbsize_local, 0);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- // Layer 1
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- for (int ic =0; ic<nbsize_local; ic++){
- int jj = border_cell_j_local[ic];
- int ii = border_cell_i_local[ic];
- int lev = border_cell_level_local[ic];
- int levmult = IPOW2(levmx-lev);
-
- int iicur = ii*levmult-iminsize;
- int iilft = max( (ii-1)*levmult, 0 )-iminsize;
- int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;
- int jjcur = jj*levmult-jminsize;
- int jjbot = max( (jj-1)*levmult, 0 )-jminsize;
- int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
-
- //fprintf(fp,"DEBUG layer ic %d num %d i %d j %d lev %d\n",ic,border_cell_num_local[ic],ii,jj,lev);
-
- int iborder = 0;
-
- // Test for cell to left
- if (iicur-(iicur-iilft)/2 >= 0 && iicur-(iicur-iilft)/2 < imaxsize-iminsize && jjcur >= 0 && (jjcur+jjtop)/2 < jmaxsize-jminsize){
- int nlftval = -1;
- // Check for finer cell left and bottom side
- if (lev != levmx){ // finer neighbor
- int iilftfiner = iicur-(iicur-iilft)/2;
- nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash);
- // Also check for finer cell left and top side
- if (nlftval < 0) {
- int jjtopfiner = (jjcur+jjtop)/2;
- nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
- }
- }
-
- if (nlftval < 0 && iilft >= 0) { // same size
- int nlfttry = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
- // we have to test for same level or it could be a finer cell one cell away that it is matching
- if (nlfttry-noffset >= 0 && nlfttry-noffset < (int)ncells && level[nlfttry-noffset] == lev) {
- nlftval = nlfttry;
- }
- }
-
- if (lev != 0 && nlftval < 0 && iilft-(iicur-iilft) >= 0){ // coarser neighbor
- iilft -= iicur-iilft;
- int jjlft = (jj/2)*2*levmult-jminsize;
- int nlfttry = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash);
- // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
- if (nlfttry-noffset >= 0 && nlfttry-noffset < (int)ncells && level[nlfttry-noffset] == lev-1) {
- nlftval = nlfttry;
- }
- }
- if (nlftval >= 0) iborder |= 0x0001;
- }
-
- // Test for cell to right
- if (iirht < imaxsize-iminsize && iirht >= 0 && jjcur >= 0 && jjtop < jmaxsize-jminsize) {
- int nrhtval = -1;
- // right neighbor -- finer, same size and coarser
- nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
- // right neighbor -- finer right top test
- if (nrhtval < 0 && lev != levmx){
- int jjtopfiner = (jjcur+jjtop)/2;
- nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
- }
- if (nrhtval < 0 && lev != 0) { // test for coarser, but not directly above
- int jjrhtcoarser = (jj/2)*2*levmult-jminsize;
- if (jjrhtcoarser != jjcur) {
- int nrhttry = read_hash(jjrhtcoarser*(imaxsize-iminsize)+iirht, hash);
- if (nrhttry-noffset >= 0 && nrhttry-noffset < (int)ncells && level[nrhttry-noffset] == lev-1) {
- nrhtval = nrhttry;
- }
- }
- }
- if (nrhtval > 0) iborder |= 0x0002;
- }
-
- // Test for cell to bottom
- if (iicur >= 0 && (iicur+iirht)/2 < imaxsize-iminsize && jjcur-(jjcur-jjbot)/2 >= 0 && jjcur-(jjcur-jjbot)/2 < jmaxsize-jminsize){
- int nbotval = -1;
- // Check for finer cell below and left side
- if (lev != levmx){ // finer neighbor
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash);
- // Also check for finer cell below and right side
- if (nbotval < 0) {
- int iirhtfiner = (iicur+iirht)/2;
- nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
- }
- }
-
- if (nbotval < 0 && jjbot >= 0) { // same size
- int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
- // we have to test for same level or it could be a finer cell one cell away that it is matching
- if (nbottry-noffset >= 0 && nbottry-noffset < (int)ncells && level[nbottry-noffset] == lev) {
- nbotval = nbottry;
- }
- }
-
- if (lev != 0 && nbotval < 0 && jjbot-(jjcur-jjbot) >= 0){ // coarser neighbor
- jjbot -= jjcur-jjbot;
- int iibot = (ii/2)*2*levmult-iminsize;
- int nbottry = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash);
- // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
- if (nbottry-noffset >= 0 && nbottry-noffset < (int)ncells && level[nbottry-noffset] == lev-1) {
- nbotval = nbottry;
- }
- }
- if (nbotval >= 0) iborder |= 0x0004;
- }
-
- // Test for cell to top
- if (iirht < imaxsize-iminsize && iicur >= 0 && jjtop >= 0 && jjtop < jmaxsize-jminsize) {
- int ntopval = -1;
- // top neighbor -- finer, same size and coarser
- ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
- // top neighbor -- finer top right test
- if (ntopval < 0 && lev != levmx){
- int iirhtfiner = (iicur+iirht)/2;
- ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
- }
- if (ntopval < 0 && lev != 0) { // test for coarser, but not directly above
- int iitopcoarser = (ii/2)*2*levmult-iminsize;
- if (iitopcoarser != iicur) {
- int ntoptry = read_hash(jjtop*(imaxsize-iminsize)+iitopcoarser, hash);
- if (ntoptry-noffset >= 0 && ntoptry-noffset < (int)ncells && level[ntoptry-noffset] == lev-1) {
- ntopval = ntoptry;
- }
- }
- }
- if (ntopval > 0) iborder |= 0x0008;
- }
-
- if (iborder) border_cell_needed_local[ic] = iborder;
- }
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- for(int ic=0; ic<nbsize_local; ic++){
- if (border_cell_needed_local[ic] == 0) continue;
- fprintf(fp,"%d: First set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
- }
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
- }
-
- // Walk through cell array and set hash to border local index plus ncells+noffset for next pass
- //fprintf(fp,"%d: DEBUG new hash jminsize %d jmaxsize %d iminsize %d imaxsize %d\n",mype,jminsize,jmaxsize,iminsize,imaxsize);
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- for(int ic=0; ic<nbsize_local; ic++){
- if (border_cell_needed_local[ic] == 0) continue;
- //fprintf(fp,"%d: index %d cell %d i %d j %d\n",mype,ic,border_cell_num_local[ic],border_cell_i_local[ic],border_cell_j_local[ic]);
- int lev = border_cell_level_local[ic];
- int levmult = IPOW2(levmx-lev);
- int ii = border_cell_i_local[ic]*levmult-iminsize;
- int jj = border_cell_j_local[ic]*levmult-jminsize;
-
- write_hash(ncells+noffset+ic, jj*(imaxsize-iminsize)+ii, hash);
- }
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_LAYER1] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- print_local();
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering for 1 layer\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
- }
-
- // Layer 2
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- for (int ic =0; ic<nbsize_local; ic++){
- if (border_cell_needed_local[ic] > 0) continue;
- int jj = border_cell_j_local[ic];
- int ii = border_cell_i_local[ic];
- int lev = border_cell_level_local[ic];
- int levmult = IPOW2(levmx-lev);
-
- int iicur = ii*levmult-iminsize;
- int iilft = max( (ii-1)*levmult, 0 )-iminsize;
- int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;
- int jjcur = jj*levmult-jminsize;
- int jjbot = max( (jj-1)*levmult, 0 )-jminsize;
- int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
-
- //fprintf(fp," DEBUG layer2 ic %d num %d i %d j %d lev %d\n",ic,border_cell_num_local[ic],ii,jj,lev);
-
- int iborder = 0;
-
- // Test for cell to left
- if (iicur-(iicur-iilft)/2 >= 0 && iicur-(iicur-iilft)/2 < imaxsize-iminsize && jjcur >= 0 && (jjcur+jjtop)/2 < jmaxsize-jminsize){
- // Check for finer cell left and bottom side
- if (lev != levmx){ // finer neighbor
- int iilftfiner = iicur-(iicur-iilft)/2;
- int nl = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash);
- if (nl >= (int)(ncells+noffset) && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) {
- iborder = 0x0001;
- } else {
- // Also check for finer cell left and top side
- int jjtopfiner = (jjcur+jjtop)/2;
- int nlt = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
- if ( nlt >= (int)(ncells+noffset) && (border_cell_needed_local[nlt-ncells-noffset] & 0x0001) == 0x0001) {
- iborder = 0x0001;
- }
- }
- }
- if ( (iborder & 0x0001) == 0 && iilft >= 0) { //same size
- int nl = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
- int levcheck = -1;
- if (nl-noffset >= 0 && nl-noffset < (int)ncells) {
- levcheck = level[nl-noffset];
- } else if (nl >= 0 && (int)(nl-ncells-noffset) >= 0 && (int)(nl-ncells-noffset) < nbsize_local) {
- levcheck = border_cell_level_local[nl-ncells-noffset];
- }
- if (nl >= (int)(ncells+noffset) && levcheck == lev && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) {
- iborder = 0x0001;
- } else if (lev != 0 && iilft-(iicur-iilft) >= 0){ // coarser neighbor
- iilft -= iicur-iilft;
- int jjlft = (jj/2)*2*levmult-jminsize;
- nl = read_hash(jjlft*(imaxsize-iminsize)+iilft, hash);
- levcheck = -1;
- if (nl-noffset >= 0 && nl-noffset < (int)ncells) {
- levcheck = level[nl-noffset];
- } else if (nl >= 0 && (int)(nl-ncells-noffset) >= 0 && (int)(nl-ncells-noffset) < nbsize_local) {
- levcheck = border_cell_level_local[nl-ncells-noffset];
- }
- // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
- if (nl >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nl-ncells-noffset] & 0x0001) == 0x0001) {
- iborder = 0x0001;
- }
- }
- }
- }
-
- // Test for cell to right
- if (iirht < imaxsize-iminsize && iirht >= 0 && jjcur >= 0 && jjtop < jmaxsize-jminsize) {
- // right neighbor -- finer, same size and coarser
- int nr = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
- if (nr >= (int)(ncells+noffset) && (border_cell_needed_local[nr-ncells-noffset] & 0x0002) == 0x0002) {
- iborder = 0x0002;
- } else if (lev != levmx){
- // right neighbor -- finer right top test
- int jjtopfiner = (jjcur+jjtop)/2;
- int nrt = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
- if (nrt >= (int)(ncells+noffset) && (border_cell_needed_local[nrt-ncells-noffset] & 0x0002) == 0x0002) {
- iborder = 0x0002;
- }
- }
- if ( (iborder & 0x0002) == 0 && lev != 0) { // test for coarser, but not directly right
- int jjrhtcoarser = (jj/2)*2*levmult-jminsize;
- if (jjrhtcoarser != jjcur) {
- int nr = read_hash(jjrhtcoarser*(imaxsize-iminsize)+iirht, hash);
- int levcheck = -1;
- if (nr-noffset >= 0 && nr-noffset < (int)ncells) {
- levcheck = level[nr-noffset];
- } else if (nr >= 0 && (int)(nr-ncells-noffset) >= 0 && (int)(nr-ncells-noffset) < nbsize_local) {
- levcheck = border_cell_level_local[nr-ncells-noffset];
- }
- if (nr >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nr-ncells-noffset] & 0x0002) == 0x0002) {
- iborder = 0x0002;
- }
- }
- }
- }
-
- // Test for cell to bottom
- if (iicur >= 0 && (iicur+iirht)/2 < imaxsize-iminsize && jjcur-(jjcur-jjbot)/2 >= 0 && jjcur-(jjcur-jjbot)/2 < jmaxsize-jminsize){
- // Check for finer cell below and left side
- if (lev != levmx){ // finer neighbor
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- int nb = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash);
- if (nb >= (int)(ncells+noffset) && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) {
- iborder = 0x0004;
- } else {
- // Also check for finer cell below and right side
- int iirhtfiner = (iicur+iirht)/2;
- int nbr = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
- if (nbr >= (int)(ncells+noffset) && (border_cell_needed_local[nbr-ncells-noffset] & 0x0004) == 0x0004) {
- iborder = 0x0004;
- }
- }
- }
- if ( (iborder & 0x0004) == 0 && jjbot >= 0) { //same size
- int nb = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
- int levcheck = -1;
- if (nb-noffset >= 0 && nb-noffset < (int)ncells) {
- levcheck = level[nb-noffset];
- } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) {
- levcheck = border_cell_level_local[nb-ncells-noffset];
- }
- if (nb >= (int)(ncells+noffset) && levcheck == lev && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) {
- iborder = 0x0004;
- } else if (lev != 0 && jjbot-(jjcur-jjbot) >= 0){ // coarser neighbor
- jjbot -= jjcur-jjbot;
- int iibot = (ii/2)*2*levmult-iminsize;
- nb = read_hash(jjbot*(imaxsize-iminsize)+iibot, hash);
- levcheck = -1;
- if (nb-noffset >= 0 && nb-noffset < (int)ncells) {
- levcheck = level[nb-noffset];
- } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) {
- levcheck = border_cell_level_local[nb-ncells-noffset];
- }
- // we have to test for coarser level or it could be a same size cell one or two cells away that it is matching
- if (nb >= (int)(ncells+noffset) && levcheck == lev-1 && (border_cell_needed_local[nb-ncells-noffset] & 0x0004) == 0x0004) {
- iborder = 0x0004;
- }
- }
- }
- }
-
- // Test for cell to top
- if (iirht < imaxsize-iminsize && iicur >= 0 && jjtop >= 0 && jjtop < jmaxsize-jminsize) {
- // top neighbor -- finer, same size and coarser
- int nt = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
- if (nt >= (int)(ncells+noffset) && (border_cell_needed_local[nt-ncells-noffset] & 0x0008) == 0x0008) {
- iborder = 0x0008;
- } else if (lev != levmx){
- int iirhtfiner = (iicur+iirht)/2;
- int ntr = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
- if ( ntr >= (int)(ncells+noffset) && (border_cell_needed_local[ntr-ncells-noffset] & 0x0008) == 0x0008) {
- iborder = 0x0008;
- }
- }
- if ( (iborder & 0x0008) == 0 && lev != 0) { // test for coarser, but not directly above
- int iitopcoarser = (ii/2)*2*levmult-iminsize;
- if (iitopcoarser != iicur) {
- int nb = read_hash(jjtop*(imaxsize-iminsize)+iitopcoarser, hash);
- int levcheck = -1;
- if (nb-noffset >= 0 && nb-noffset < (int)ncells) {
- levcheck = level[nb-noffset];
- } else if (nb >= 0 && (int)(nb-ncells-noffset) >= 0 && (int)(nb-ncells-noffset) < nbsize_local) {
- levcheck = border_cell_level_local[nb-ncells-noffset];
- }
- if (nb-noffset >= (int)(ncells-noffset) && levcheck == lev-1 && (border_cell_needed_local[nb-ncells-noffset] & 0x0008) == 0x0008) {
- iborder = 0x0008;
- }
- }
- }
- }
-
- if (iborder) border_cell_needed_local[ic] = iborder |= 0x0016;
- }
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- vector<int> indices_needed;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- if (DEBUG) {
- for(int ic=0; ic<nbsize_local; ic++){
- if (border_cell_needed_local[ic] < 0x0016) fprintf(fp,"%d: First set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
- if (border_cell_needed_local[ic] >= 0x0016) fprintf(fp,"%d: Second set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
- }
- }
-
- int inew = 0;
- for(int ic=0; ic<nbsize_local; ic++){
- if (border_cell_needed_local[ic] <= 0) continue;
- indices_needed.push_back(border_cell_num_local[ic]);
-
- border_cell_num_local[inew] = border_cell_num_local[ic];
- border_cell_i_local[inew] = border_cell_i_local[ic];
- border_cell_j_local[inew] = border_cell_j_local[ic];
- border_cell_level_local[inew] = border_cell_level_local[ic];
- // border_cell_num_local is not used after -- could be commented out?
- // border_cell_needed_local[inew] = 1;
-
- inew++;
- }
- nbsize_local = inew;
-
- free(border_cell_num_local);
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- // Walk through cell array and set hash to global cell values
- //fprintf(fp,"%d: DEBUG new hash jminsize %d jmaxsize %d iminsize %d imaxsize %d\n",mype,jminsize,jmaxsize,iminsize,imaxsize);
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for(int ic=0; ic<nbsize_local; ic++){
- int lev = border_cell_level_local[ic];
- int levmult = IPOW2(levmx-lev);
-
- int ii = border_cell_i_local[ic]*levmult-iminsize;
- int jj = border_cell_j_local[ic]*levmult-jminsize;
-
- write_hash(-(ncells+ic), jj*(imaxsize-iminsize)+ii, hash);
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_LAYER2] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- print_local();
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering for 2 layer\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-#ifdef _OPENMP
- } // end master region
-#endif
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_LAYER_LIST] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- int nghost = nbsize_local;
- ncells_ghost = ncells + nghost;
-
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- celltype = (int *)mesh_memory.memory_realloc(ncells_ghost, celltype);
- i = (int *)mesh_memory.memory_realloc(ncells_ghost, i);
- j = (int *)mesh_memory.memory_realloc(ncells_ghost, j);
- level = (int *)mesh_memory.memory_realloc(ncells_ghost, level);
- nlft = (int *)mesh_memory.memory_realloc(ncells_ghost, nlft);
- nrht = (int *)mesh_memory.memory_realloc(ncells_ghost, nrht);
- nbot = (int *)mesh_memory.memory_realloc(ncells_ghost, nbot);
- ntop = (int *)mesh_memory.memory_realloc(ncells_ghost, ntop);
- memory_reset_ptrs();
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int ic = ncells; ic < (int)ncells_ghost; ic++){
- nlft[ic] = -1;
- nrht[ic] = -1;
- nbot[ic] = -1;
- ntop[ic] = -1;
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_COPY_MESH_DATA] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for(int ic=0; ic<nbsize_local; ic++){
- int ii = border_cell_i_local[ic];
- int jj = border_cell_j_local[ic];
- int lev = border_cell_level_local[ic];
- if (ii < lev_ibegin[lev]) celltype[ncells+ic] = LEFT_BOUNDARY;
- if (ii > lev_iend[lev]) celltype[ncells+ic] = RIGHT_BOUNDARY;
- if (jj < lev_jbegin[lev]) celltype[ncells+ic] = BOTTOM_BOUNDARY;
- if (jj > lev_jend[lev]) celltype[ncells+ic] = TOP_BOUNDARY;
- i[ncells+ic] = ii;
- j[ncells+ic] = jj;
- level[ncells+ic] = lev;
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- free(border_cell_i_local);
- free(border_cell_j_local);
- free(border_cell_level_local);
-#ifdef _OPENMP
- } // end master region
-#endif
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_FILL_MESH_GHOST] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- fprintf(fp,"After copying i,j, level to ghost cells\n");
- print_local();
-#ifdef _OPENMP
- } // end master region
-#endif
- }
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells_ghost; ic++){
- int ii = i[ic];
- int jj = j[ic];
- int lev = level[ic];
- int levmult = IPOW2(levmx-lev);
-
- int iicur = ii*levmult-iminsize;
- int iilft = max( (ii-1)*levmult, 0 )-iminsize;
- int iirht = min( (ii+1)*levmult, imaxcalc-1)-iminsize;
- int jjcur = jj*levmult-jminsize;
- int jjbot = max( (jj-1)*levmult, 0 )-jminsize;
- int jjtop = min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
-
- //fprintf(fp,"DEBUG neigh ic %d nlft %d ii %d levmult %d iminsize %d icheck %d\n",ic,nlft[ic],ii,levmult,iminsize,(max( ii *levmult-1, 0))-iminsize);
-
- int nlftval = nlft[ic];
- int nrhtval = nrht[ic];
- int nbotval = nbot[ic];
- int ntopval = ntop[ic];
-
- if (nlftval == -1){
- // Taking care of boundary cells
- // Force each boundary cell to point to itself on its boundary direction
- if (iicur < 1*IPOW2(levmx) -iminsize) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
-
- // Boundary cells next to corner boundary need special checks
- if (iicur == 1*IPOW2(levmx)-iminsize && (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
-
- // need to check for finer neighbor first
- // Right and top neighbor don't change for finer, so drop through to same size
- // Left and bottom need to be half of same size index for finer test
- if (lev != levmx) {
- int iilftfiner = iicur-(iicur-iilft)/2;
- if (nlftval == -1 && iilftfiner >= 0) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilftfiner, hash);
- }
-
- // same size neighbor
- if (nlftval == -1 && iilft >= 0) nlftval = read_hash(jjcur*(imaxsize-iminsize)+iilft, hash);
-
- // Now we need to take care of special case where bottom and left boundary need adjustment since
- // expected cell doesn't exist on these boundaries if it is finer than current cell
- if (jjcur < 1*IPOW2(levmx) && lev != levmx) {
- if (nlftval == -1) {
- int iilftfiner = iicur-(iicur-iilft)/2;
- int jjtopfiner = (jjcur+jjtop)/2;
- if (jjtopfiner < jmaxsize-jminsize && iilftfiner >= 0) nlftval = read_hash(jjtopfiner*(imaxsize-iminsize)+iilftfiner, hash);
- }
- }
-
- // coarser neighbor
- if (lev != 0){
- if (nlftval == -1) {
- int iilftcoarser = iilft - (iicur-iilft);
- int jjlft = (jj/2)*2*levmult-jminsize;
- if (iilftcoarser >=0) nlftval = read_hash(jjlft*(imaxsize-iminsize)+iilftcoarser, hash);
- }
- }
-
- if (nlftval != -1) nlft[ic] = nlftval;
- }
-
- if (nrhtval == -1) {
- // Taking care of boundary cells
- // Force each boundary cell to point to itself on its boundary direction
- if (iicur > imax*IPOW2(levmx)-1-iminsize) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
-
- // Boundary cells next to corner boundary need special checks
- if (iirht == imax*IPOW2(levmx)-iminsize && (jjcur < 1*IPOW2(levmx)-jminsize || jjcur >= jmax*IPOW2(levmx)-jminsize ) ) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
-
- // same size neighbor
- if (nrhtval == -1 && iirht < imaxsize-iminsize) nrhtval = read_hash(jjcur*(imaxsize-iminsize)+iirht, hash);
-
- // Now we need to take care of special case where bottom and left boundary need adjustment since
- // expected cell doesn't exist on these boundaries if it is finer than current cell
- if (jjcur < 1*IPOW2(levmx) && lev != levmx) {
- if (nrhtval == -1) {
- int jjtopfiner = (jjcur+jjtop)/2;
- if (jjtopfiner < jmaxsize-jminsize && iirht < imaxsize-iminsize) nrhtval = read_hash(jjtopfiner*(imaxsize-iminsize)+iirht, hash);
- }
- }
-
- // coarser neighbor
- if (lev != 0){
- if (nrhtval == -1) {
- int jjrht = (jj/2)*2*levmult-jminsize;
- if (iirht < imaxsize-iminsize) nrhtval = read_hash(jjrht*(imaxsize-iminsize)+iirht, hash);
- }
- }
- if (nrhtval != -1) nrht[ic] = nrhtval;
- }
-
- if (nbotval == -1) {
- // Taking care of boundary cells
- // Force each boundary cell to point to itself on its boundary direction
- if (jjcur < 1*IPOW2(levmx) -jminsize) nbotval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
- // Boundary cells next to corner boundary need special checks
- if (jjcur == 1*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) nbotval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
-
- // need to check for finer neighbor first
- // Right and top neighbor don't change for finer, so drop through to same size
- // Left and bottom need to be half of same size index for finer test
- if (lev != levmx) {
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- if (nbotval == -1 && jjbotfiner >= 0) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iicur, hash);
- }
-
- // same size neighbor
- if (nbotval == -1 && jjbot >=0) nbotval = read_hash(jjbot*(imaxsize-iminsize)+iicur, hash);
-
- // Now we need to take care of special case where bottom and left boundary need adjustment since
- // expected cell doesn't exist on these boundaries if it is finer than current cell
- if (iicur < 1*IPOW2(levmx) && lev != levmx) {
- if (nbotval == -1) {
- int iirhtfiner = (iicur+iirht)/2;
- int jjbotfiner = jjcur-(jjcur-jjbot)/2;
- if (jjbotfiner >= 0 && iirhtfiner < imaxsize-iminsize) nbotval = read_hash(jjbotfiner*(imaxsize-iminsize)+iirhtfiner, hash);
- }
- }
-
- // coarser neighbor
- if (lev != 0){
- if (nbotval == -1) {
- int jjbotcoarser = jjbot - (jjcur-jjbot);
- int iibot = (ii/2)*2*levmult-iminsize;
- if (jjbotcoarser >= 0 && iibot >= 0) nbotval = read_hash(jjbotcoarser*(imaxsize-iminsize)+iibot, hash);
- }
- }
- if (nbotval != -1) nbot[ic] = nbotval;
- }
-
- if (ntopval == -1) {
- // Taking care of boundary cells
- // Force each boundary cell to point to itself on its boundary direction
- if (jjcur > jmax*IPOW2(levmx)-1-jminsize) ntopval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
- // Boundary cells next to corner boundary need special checks
- if (jjtop == jmax*IPOW2(levmx)-jminsize && (iicur < 1*IPOW2(levmx)-iminsize || iicur >= imax*IPOW2(levmx)-iminsize ) ) ntopval = read_hash(jjcur*(imaxsize-iminsize)+iicur, hash);
-
- // same size neighbor
- if (ntopval == -1 && jjtop < jmaxsize-jminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iicur, hash);
-
- if (iicur < 1*IPOW2(levmx)) {
- if (ntopval == -1) {
- int iirhtfiner = (iicur+iirht)/2;
- if (jjtop < jmaxsize-jminsize && iirhtfiner < imaxsize-iminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iirhtfiner, hash);
- }
- }
-
- // coarser neighbor
- if (lev != 0){
- if (ntopval == -1) {
- int iitop = (ii/2)*2*levmult-iminsize;
- if (jjtop < jmaxsize-jminsize && iitop < imaxsize-iminsize) ntopval = read_hash(jjtop*(imaxsize-iminsize)+iitop, hash);
- }
- }
- if (ntopval != -1) ntop[ic] = ntopval;
- }
-
- //fprintf(fp,"%d: neighbors[%d] = %d %d %d %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_FILL_NEIGH_GHOST] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- fprintf(fp,"After setting neighbors through ghost cells\n");
- print_local();
-#ifdef _OPENMP
- } // end master region
-#endif
- }
-
-/*
- // Set neighbors to global cell numbers from hash
- for (uint ic=0; ic<ncells; ic++){
- ii = i[ic];
- jj = j[ic];
- lev = level[ic];
- levmult = IPOW2(levmx-lev);
- //fprintf(fp,"%d:Neighbors input for ic %d ii %d jj %d levmult %d lev %d\n",mype,ic, ii, jj, levmult,lev);
- //fprintf(fp,"%d:Neighbors befor ic %d nlft %d nrht %d nbot %d ntop %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
- if (nlft[ic] == -1) nlft[ic] = hash[( jj *levmult )-jminsize][(max( ii *levmult-1, 0 ))-iminsize];
- if (celltype[ic] == BOTTOM_BOUNDARY && nlft[ic] == -1){
- if (nlft[ic] == -1) nlft[ic] = hash[(jj+1)*levmult-jminsize][(min( (ii+1)*levmult, imaxcalc-1))-iminsize];
- }
- if (nrht[ic] == -1) nrht[ic] = hash[( jj *levmult )-jminsize][(min( (ii+1)*levmult, imaxcalc-1))-iminsize];
- if (celltype[ic] == BOTTOM_BOUNDARY && nrht[ic] == -1){
- if (nrht[ic] == -1) nrht[ic] = hash[(jj+1)*levmult-jminsize][(min( (ii+1)*levmult, imaxcalc-1))-iminsize];
- //if (ic == 3 && mype == 0) printf("DEBUG line %d -- ic %d celltype %d nrht %d\n",__line__,ic,celltype[ic],nrht[ic]);
- //printf("DEBUG line %d -- ic %d celltype %d nrht %d jj %d ii %d\n",__line__,ic,celltype[ic],nrht[ic],(jj+1)*levmult-jminsize,(min( (ii+1)*levmult, imaxcalc-1))-iminsize);
- }
- if (nbot[ic] == -1) nbot[ic] = hash[(max( jj *levmult-1, 0) )-jminsize][( ii *levmult )-iminsize];
- if (celltype[ic] == LEFT_BOUNDARY && nbot[ic] == -1){
- if (nbot[ic] == -1) nbot[ic] = hash[(max( jj *levmult-1, 0) )-jminsize][( ii *levmult+1 )-iminsize];
- }
- if (ntop[ic] == -1) ntop[ic] = hash[(min( (jj+1)*levmult, jmaxcalc-1))-jminsize][( ii *levmult )-iminsize];
- if (celltype[ic] == LEFT_BOUNDARY && ntop[ic] == -1){
- if (ntop[ic] == -1) ntop[ic] = hash[(min( (jj+1)*levmult, jmaxcalc-1))-jminsize][( ii *levmult+1 )-iminsize];
- }
- //fprintf(fp,"%d:Neighbors after ic %d nlft %d nrht %d nbot %d ntop %d\n",mype,ic,nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
- }
-*/
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_SET_CORNER_NEIGH] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- fprintf(fp,"After setting corner neighbors\n");
- print_local();
-#ifdef _OPENMP
- } // end master region
-#endif
- }
-
- // Adjusting neighbors to local indices
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells_ghost; ic++){
- //fprintf(fp,"%d: ic %d nlft %d noffset %d ncells %ld\n",mype,ic,nlft[ic],noffset,ncells);
- if (nlft[ic] <= -(int)ncells && nlft[ic] > -(int)ncells_ghost){
- nlft[ic] = abs(nlft[ic]);
- } else if (nlft[ic] >= noffset && nlft[ic] < (int)(noffset+ncells)) {
- nlft[ic] -= noffset;
- }
- if (nrht[ic] <= -(int)ncells && nrht[ic] > -(int)ncells_ghost){
- nrht[ic] = abs(nrht[ic]);
- } else if (nrht[ic] >= noffset && nrht[ic] < (int)(noffset+ncells)) {
- nrht[ic] -= noffset;
- }
- if (nbot[ic] <= -(int)ncells && nbot[ic] > -(int)ncells_ghost){
- nbot[ic] = abs(nbot[ic]);
- } else if (nbot[ic] >= noffset && nbot[ic] < (int)(noffset+ncells)) {
- nbot[ic] -= noffset;
- }
- if (ntop[ic] <= -(int)ncells && ntop[ic] > -(int)ncells_ghost){
- ntop[ic] = abs(ntop[ic]);
- } else if (ntop[ic] >= noffset && ntop[ic] < (int)(noffset+ncells)) {
- ntop[ic] -= noffset;
- }
- }
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- fprintf(fp,"After adjusting neighbors to local indices\n");
- print_local();
-#ifdef _OPENMP
- } // end master region
-#endif
- }
-
- if (TIMING_LEVEL >= 2) {
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_NEIGH_ADJUST] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- offtile_ratio_local = (offtile_ratio_local*(double)offtile_local_count) + ((double)nghost / (double)ncells);
- offtile_local_count++;
- offtile_ratio_local /= offtile_local_count;
- //printf("%d ncells size is %ld ncells_ghost size is %ld nghost %d\n",mype,ncells,ncells_ghost,nghost);
- //fprintf(fp,"%d ncells_ghost size is %ld nghost %d\n",mype,ncells_ghost,nghost);
-
- if (cell_handle) L7_Free(&cell_handle);
- cell_handle=0;
-
- if (DEBUG) {
- fprintf(fp,"%d: SETUP ncells %ld noffset %d nghost %d\n",mype,ncells,noffset,nghost);
- for (int ig = 0; ig<nghost; ig++){
- fprintf(fp,"%d: indices needed ic %d index %d\n",mype,ig,indices_needed[ig]);
- }
- }
- L7_Setup(0, noffset, ncells, &indices_needed[0], nghost, &cell_handle);
-
- if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_SETUP_COMM] += cpu_timer_stop(tstart_lev2);
-
-#ifdef _OPENMP
- } // end master region
-#endif
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- print_local();
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nlft numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
- if ( (hashval >= 0 && hashval < (int)ncells) ) {
- fprintf(fp,"%5d",nlft[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nrht numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if ( ii >= iminsize && ii < imaxsize ) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
- if ( hashval >= 0 && hashval < (int)ncells ) {
- fprintf(fp,"%5d",nrht[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nbot numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if ( ii >= iminsize && ii < imaxsize ) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
- if ( hashval >= 0 && hashval < (int)ncells ) {
- fprintf(fp,"%5d",nbot[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n ntop numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if ( ii >= iminsize && ii < imaxsize ) {
- int hashval = read_hash((jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), hash) -noffset;
- if ( hashval >= 0 && hashval < (int)ncells ) {
- fprintf(fp,"%5d",ntop[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
-#ifdef _OPENMP
- } // end master region
-#endif
- } // end DEBUG
-
- if (DEBUG) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- print_local();
-
- for (uint ic=0; ic<ncells; ic++){
- fprintf(fp,"%d: before update ic %d i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
- mype,ic,i[ic],j[ic],level[ic],nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
- }
- int ig=0;
- for (uint ic=ncells; ic<ncells_ghost; ic++, ig++){
- fprintf(fp,"%d: after update ic %d off %d i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
- mype,ic,indices_needed[ig],i[ic],j[ic],level[ic],nlft[ic],nrht[ic],nbot[ic],ntop[ic]);
- }
-#ifdef _OPENMP
- } // end master region
-#endif
- } // end DEBUG
-
- } // if numpe > 1
-#endif
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- write_hash_collision_report();
- read_hash_collision_report();
- compact_hash_delete(hash);
-
-#ifdef BOUNDS_CHECK
- {
- for (uint ic=0; ic<ncells; ic++){
- int nl = nlft[ic];
- if (nl<0 || nl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
- if (level[nl] > level[ic]){
- int ntl = ntop[nl];
- if (ntl<0 || ntl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mype,__LINE__,ic,ic+noffset,nl,ntl);
- }
- int nr = nrht[ic];
- if (nr<0 || nr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
- if (level[nr] > level[ic]){
- int ntr = ntop[nr];
- if (ntr<0 || ntr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mype,__LINE__,ic,ntr);
- }
- int nb = nbot[ic];
- if (nb<0 || nb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
- if (level[nb] > level[ic]){
- int nrb = nrht[nb];
- if (nrb<0 || nrb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mype,__LINE__,ic,nrb);
- }
- int nt = ntop[ic];
- if (nt<0 || nt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt);
- if (level[nt] > level[ic]){
- int nrt = nrht[nt];
- if (nrt<0 || nrt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mype,__LINE__,ic,nrt);
- }
- }
- }
-#endif
-
-#ifdef _OPENMP
- } // end master region
-#pragma omp barrier
-#endif
-
- } else if (calc_neighbor_type == KDTREE) {
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- struct timeval tstart_lev2;
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
- TBounds box;
- vector<int> index_list(IPOW2(levmx*levmx) );
-
- int num;
-
- ibase = 0;
- calc_spatial_coordinates(ibase);
-
- kdtree_setup();
-
- if (TIMING_LEVEL >= 2) {
- cpu_timers[MESH_TIMER_KDTREE_SETUP] += cpu_timer_stop(tstart_lev2);
- cpu_timer_start(&tstart_lev2);
- }
-
- for (uint ic=0; ic<ncells; ic++) {
-
- //left
- nlft[ic] = ic;
- box.min.x = x[ic]-0.25*dx[ic];
- box.max.x = x[ic]-0.25*dx[ic];
- box.min.y = y[ic]+0.25*dy[ic];
- box.max.y = y[ic]+0.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) nlft[ic]=index_list[0];
-
- //right
- nrht[ic] = ic;
- box.min.x = x[ic]+1.25*dx[ic];
- box.max.x = x[ic]+1.25*dx[ic];
- box.min.y = y[ic]+0.25*dy[ic];
- box.max.y = y[ic]+0.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) nrht[ic]=index_list[0];
-
- //bot
- nbot[ic] = ic;
- box.min.x = x[ic]+0.25*dx[ic];
- box.max.x = x[ic]+0.25*dx[ic];
- box.min.y = y[ic]-0.25*dy[ic];
- box.max.y = y[ic]-0.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) nbot[ic]=index_list[0];
-
- //top
- ntop[ic] = ic;
- box.min.x = x[ic]+0.25*dx[ic];
- box.max.x = x[ic]+0.25*dx[ic];
- box.min.y = y[ic]+1.25*dy[ic];
- box.max.y = y[ic]+1.25*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) ntop[ic]=index_list[0];
- } // End main loop over cells.
-
- KDTree_Destroy(&tree);
-
- if (TIMING_LEVEL >= 2) cpu_timers[MESH_TIMER_KDTREE_QUERY] += cpu_timer_stop(tstart_lev2);
-
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
- } // calc_neighbor_type
-
- }
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[MESH_TIMER_CALC_NEIGHBORS] += cpu_timer_stop(tstart_cpu);
-}
-
-#ifdef HAVE_OPENCL
-void Mesh::gpu_calc_neighbors(void)
-{
- if (! gpu_do_rezone) return;
-
- ulong gpu_hash_table_size = 0;
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- struct timeval tstart_lev2;
- cpu_timer_start(&tstart_lev2);
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- gpu_counters[MESH_COUNTER_CALC_NEIGH]++;
-
- assert(dev_levtable);
- assert(dev_level);
- assert(dev_i);
- assert(dev_j);
-
- size_t mem_request = (int)((float)ncells*mem_factor);
-
- size_t local_work_size = MIN(ncells, TILE_SIZE);
- size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
-
- //printf("DEBUG file %s line %d dev_nlft %p size %d\n",__FILE__,__LINE__,dev_nlft,ezcl_get_device_mem_nelements(dev_nlft));
-
- if (dev_nlft == NULL || ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells) {
- dev_nlft = ezcl_malloc(NULL, const_cast<char *>("dev_nlft"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_nrht = ezcl_malloc(NULL, const_cast<char *>("dev_nrht"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_nbot = ezcl_malloc(NULL, const_cast<char *>("dev_nbot"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_ntop = ezcl_malloc(NULL, const_cast<char *>("dev_ntop"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_set_kernel_arg(kernel_neighbor_init, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_neighbor_init, 1, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_neighbor_init, 2, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_neighbor_init, 3, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_neighbor_init, 4, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_neighbor_init, 1, NULL, &global_work_size, &local_work_size, NULL);
- }
-
- int imaxsize = (imax+1)*IPOW2(levmx);
- int jmaxsize = (jmax+1)*IPOW2(levmx);
-
- int gpu_hash_method = METHOD_UNSET;
-// allow input.c to control hash types and methods
- if (choose_hash_method != METHOD_UNSET) gpu_hash_method = choose_hash_method;
-//=========
-
- size_t hashsize;
-
- uint hash_report_level = 1;
- cl_mem dev_hash_header = NULL;
- cl_mem dev_hash = gpu_compact_hash_init(ncells, imaxsize, jmaxsize, gpu_hash_method, hash_report_level,
- &gpu_hash_table_size, &hashsize, &dev_hash_header);
-
- /*
- const int isize, // 0
- const int levmx, // 1
- const int imaxsize, // 2
- __global const int *levtable, // 3
- __global const int *level, // 4
- __global const int *i, // 5
- __global const int *j, // 6
- __global const ulong *hash_header, // 7
- __global int *hash) // 8
- */
-
- cl_event hash_setup_event;
-
- ezcl_set_kernel_arg(kernel_hash_setup, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_hash_setup, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_hash_setup, 2, sizeof(cl_int), (void *)&imaxsize);
- ezcl_set_kernel_arg(kernel_hash_setup, 3, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_hash_setup, 4, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_hash_setup, 5, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_hash_setup, 6, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_hash_setup, 7, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_hash_setup, 8, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_hash_setup, 9, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_hash_setup, 10, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_hash_setup, 11, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_hash_setup, 12, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_setup, 1, NULL, &global_work_size, &local_work_size, &hash_setup_event);
-
- ezcl_wait_for_events(1, &hash_setup_event);
- ezcl_event_release(hash_setup_event);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_HASH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- /*
- const int isize, // 0
- const int levmx, // 1
- const int imax, // 2
- const int jmax, // 3
- const int imaxsize, // 4
- const int jmaxsize, // 5
- __global const int *levtable, // 6
- __global const int *level, // 7
- __global const int *i, // 8
- __global const int *j, // 9
- __global int *nlft, // 10
- __global int *nrht, // 11
- __global int *nbot, // 12
- __global int *ntop, // 13
- __global const ulong *hash_header, // 14
- __global int *hash) // 15
- */
-
- cl_event calc_neighbors_event;
-
- ezcl_set_kernel_arg(kernel_calc_neighbors, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 2, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 3, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 4, sizeof(cl_int), (void *)&imaxsize);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 5, sizeof(cl_int), (void *)&jmaxsize);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 6, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 7, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 8, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 9, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 10, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 11, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 12, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 13, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 14, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_calc_neighbors, 15, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_neighbors, 1, NULL, &global_work_size, &local_work_size, &calc_neighbors_event);
-
- ezcl_wait_for_events(1, &calc_neighbors_event);
- ezcl_event_release(calc_neighbors_event);
-
- gpu_compact_hash_delete(dev_hash, dev_hash_header);
-
- if (TIMING_LEVEL >= 2) gpu_timers[MESH_TIMER_HASH_QUERY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
-
- gpu_timers[MESH_TIMER_CALC_NEIGHBORS] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
-}
-
-
-void Mesh::gpu_calc_neighbors_local(void)
-{
- if (! gpu_do_rezone) return;
-
- ulong gpu_hash_table_size = 0;
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- struct timeval tstart_lev2;
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- gpu_counters[MESH_COUNTER_CALC_NEIGH]++;
-
- ncells_ghost = ncells;
-
- assert(dev_levtable);
- assert(dev_level);
- assert(dev_i);
- assert(dev_j);
-
- size_t one = 1;
- cl_mem dev_check = ezcl_malloc(NULL, const_cast<char *>("dev_check"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- size_t mem_request = (int)((float)ncells*mem_factor);
- dev_nlft = ezcl_malloc(NULL, const_cast<char *>("dev_nlft"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_nrht = ezcl_malloc(NULL, const_cast<char *>("dev_nrht"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_nbot = ezcl_malloc(NULL, const_cast<char *>("dev_nbot"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_ntop = ezcl_malloc(NULL, const_cast<char *>("dev_ntop"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- size_t local_work_size = 64;
- size_t global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
- size_t block_size = global_work_size/local_work_size;
-
- //printf("DEBUG file %s line %d lws = %d gws %d bs %d ncells %d\n",__FILE__,__LINE__,
- // local_work_size, global_work_size, block_size, ncells);
- cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int4), CL_MEM_READ_WRITE, 0);
- cl_mem dev_sizes = ezcl_malloc(NULL, const_cast<char *>("dev_sizes"), &one, sizeof(cl_int4), CL_MEM_READ_WRITE, 0);
-
-#ifdef BOUNDS_CHECK
- if (ezcl_get_device_mem_nelements(dev_i) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_j) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_level) < (int)ncells ){
- printf("%d: Warning ncells %ld size dev_i %d dev_j %d dev_level %d\n",mype,ncells,ezcl_get_device_mem_nelements(dev_i),ezcl_get_device_mem_nelements(dev_j),ezcl_get_device_mem_nelements(dev_level));
- }
-#endif
-
- /*
- __kernel void calc_hash_size_cl(
- const int ncells, // 0
- const int levmx, // 1
- __global int *levtable, // 2
- __global int *level, // 3
- __global int *i, // 4
- __global int *j, // 5
- __global int4 *redscratch, // 6
- __global int4 *sizes, // 7
- __local int4 *tile) // 8
- */
-
- ezcl_set_kernel_arg(kernel_hash_size, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_hash_size, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_hash_size, 2, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_hash_size, 3, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_hash_size, 4, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_hash_size, 5, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_hash_size, 6, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_hash_size, 7, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_hash_size, 8, local_work_size*sizeof(cl_int4), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_size, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- if (block_size > 1) {
- /*
- __kernel void finish_reduction_minmax4_cl(
- const int isize, // 0
- __global int4 *redscratch, // 1
- __global int4 *sizes, // 2
- __local int4 *tile) // 3
- */
- ezcl_set_kernel_arg(kernel_finish_hash_size, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_finish_hash_size, 1, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_finish_hash_size, 2, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_finish_hash_size, 3, local_work_size*sizeof(cl_int4), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_hash_size, 1, NULL, &local_work_size, &local_work_size, NULL);
- }
-
- ezcl_device_memory_delete(dev_redscratch);
-
- cl_int sizes[4];
- ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes, NULL);
-
- int imintile = sizes[0];
- int imaxtile = sizes[1];
- int jmintile = sizes[2];
- int jmaxtile = sizes[3];
-
- // Expand size by 2*coarse_cells for ghost cells
- // TODO: May want to get fancier here and calc based on cell level
- int jminsize = max(jmintile-2*IPOW2(levmx),0);
- int jmaxsize = min(jmaxtile+2*IPOW2(levmx),(jmax+1)*IPOW2(levmx));
- int iminsize = max(imintile-2*IPOW2(levmx),0);
- int imaxsize = min(imaxtile+2*IPOW2(levmx),(imax+1)*IPOW2(levmx));
- //fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize,imaxsize,jminsize,jmaxsize);
-
- //ezcl_enqueue_write_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes, NULL);
-
- int gpu_hash_method = METHOD_UNSET;
-// allow imput.c to control hash types and methods
- if (choose_hash_method != METHOD_UNSET) gpu_hash_method = choose_hash_method;
-//=========
-
- size_t hashsize;
-
- uint hash_report_level = 1;
- cl_mem dev_hash_header = NULL;
- cl_mem dev_hash = gpu_compact_hash_init(ncells, imaxsize-iminsize, jmaxsize-jminsize, gpu_hash_method, hash_report_level, &gpu_hash_table_size, &hashsize, &dev_hash_header);
-
- int csize = corners_i.size();
-#ifdef BOUNDS_CHECK
- for (int ic=0; ic<csize; ic++){
- if (corners_i[ic] >= iminsize) continue;
- if (corners_j[ic] >= jminsize) continue;
- if (corners_i[ic] < imaxsize) continue;
- if (corners_j[ic] < jmaxsize) continue;
- if ( (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize) < 0 ||
- (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize) > (int)hashsize){
- printf("%d: Warning corners i %d j %d hash %d\n",mype,corners_i[ic],corners_j[ic],
- (corners_j[ic]-jminsize)*(imaxsize-iminsize)+(corners_i[ic]-iminsize));
- }
- }
-#endif
-
- size_t corners_local_work_size = MIN(csize, TILE_SIZE);
- size_t corners_global_work_size = ((csize+corners_local_work_size - 1) /corners_local_work_size) * corners_local_work_size;
-
- ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 0, sizeof(cl_int), (void *)&csize);
- ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 2, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 3, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 4, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_hash_adjust_sizes, 5, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_adjust_sizes, 1, NULL, &corners_global_work_size, &corners_local_work_size, NULL);
-
- if (DEBUG){
- vector<int> sizes_tmp(4);
- ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes_tmp[0], NULL);
- int iminsize_tmp = sizes_tmp[0];
- int imaxsize_tmp = sizes_tmp[1];
- int jminsize_tmp = sizes_tmp[2];
- int jmaxsize_tmp = sizes_tmp[3];
- fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize_tmp,imaxsize_tmp,jminsize_tmp,jmaxsize_tmp);
- }
-
- local_work_size = 128;
- global_work_size = ((ncells + local_work_size - 1) /local_work_size) * local_work_size;
-
-#ifdef BOUNDS_CHECK
- {
- vector<int> i_tmp(ncells);
- vector<int> j_tmp(ncells);
- vector<int> level_tmp(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells*sizeof(cl_int), &j_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells*sizeof(cl_int), &level_tmp[0], NULL);
- for (int ic=0; ic<(int)ncells; ic++){
- int lev = level_tmp[ic];
- for ( int jj = j_tmp[ic]*IPOW2(levmx-lev)-jminsize; jj < (j_tmp[ic]+1)*IPOW2(levmx-lev)-jminsize; jj++) {
- for (int ii = i_tmp[ic]*IPOW2(levmx-lev)-iminsize; ii < (i_tmp[ic]+1)*IPOW2(levmx-lev)-iminsize; ii++) {
- if (jj < 0 || jj >= (jmaxsize-jminsize) || ii < 0 || ii >= (imaxsize-iminsize) ) {
- printf("%d: Warning ncell %d writes to hash out-of-bounds at line %d ii %d jj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,ic,__LINE__,ii,jj,iminsize,imaxsize,jminsize,jmaxsize);
- }
- }
- }
- }
- }
-#endif
-
- //printf("%d: lws %d gws %d \n",mype,local_work_size,global_work_size);
- cl_event hash_setup_local_event;
-
- /*
- const int isize, // 0
- const int levmx, // 1
- const int imax, // 2
- const int jmax, // 3
- const int noffset, // 4
- __global int *sizes, // 5
- __global int *levtable, // 6
- __global int *level, // 7
- __global int *i, // 8
- __global int *j, // 9
- __global const ulong *hash_heaer, // 10
- __global int *hash) // 11
- */
-
- ezcl_set_kernel_arg(kernel_hash_setup_local, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 2, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 3, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 4, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 5, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 6, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 7, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 8, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 9, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 10, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_hash_setup_local, 11, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_hash_setup_local, 1, NULL, &global_work_size, &local_work_size, &hash_setup_local_event);
-
- ezcl_wait_for_events(1, &hash_setup_local_event);
- ezcl_event_release(hash_setup_local_event);
-
- if (DEBUG){
- vector<int> sizes_tmp(4);
- ezcl_enqueue_read_buffer(command_queue, dev_sizes, CL_TRUE, 0, 1*sizeof(cl_int4), &sizes_tmp[0], NULL);
- int iminsize_tmp = sizes_tmp[0];
- int imaxsize_tmp = sizes_tmp[1];
- int jminsize_tmp = sizes_tmp[2];
- int jmaxsize_tmp = sizes_tmp[3];
- fprintf(fp,"%d: Sizes are imin %d imax %d jmin %d jmax %d\n",mype,iminsize_tmp,imaxsize_tmp,jminsize_tmp,jmaxsize_tmp);
- }
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_HASH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
-#ifdef BOUNDS_CHECK
- {
- if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_i) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_j) < (int)ncells ||
- ezcl_get_device_mem_nelements(dev_level) < (int)ncells ) {
- printf("%d: Warning -- sizes for dev_neigh too small ncells %ld neigh %d %d %d %d %d %d %d\n",mype,ncells,ezcl_get_device_mem_nelements(dev_nlft),ezcl_get_device_mem_nelements(dev_nrht),ezcl_get_device_mem_nelements(dev_nbot),ezcl_get_device_mem_nelements(dev_ntop),ezcl_get_device_mem_nelements(dev_i),ezcl_get_device_mem_nelements(dev_j),ezcl_get_device_mem_nelements(dev_level));
- }
- vector<int> level_tmp(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells*sizeof(cl_int), &level_tmp[0], NULL);
- int iflag = 0;
- for (int ic=0; ic<ncells; ic++){
- if (levmx-level_tmp[ic] < 0 || levmx-level_tmp[ic] > levmx) {
- printf("%d: Warning level value bad ic %d level %d ncells %d\n",mype,ic,level_tmp[ic],ncells);
- iflag++;
- }
- }
- if (ezcl_get_device_mem_nelements(dev_levtable) < levmx+1) printf("%d Warning levtable too small levmx is %d devtable size is %d\n",mype,levmx,ezcl_get_device_mem_nelements(dev_levtable));
-#ifdef HAVE_MPI
- if (iflag > 20) {fflush(stdout); L7_Terminate(); exit(0);}
-#endif
- }
-#endif
-
-#ifdef BOUNDS_CHECK
- {
- int jmaxcalc = (jmax+1)*IPOW2(levmx);
- int imaxcalc = (imax+1)*IPOW2(levmx);
- vector<int> i_tmp(ncells);
- vector<int> j_tmp(ncells);
- vector<int> level_tmp(ncells);
- vector<int> hash_tmp(hashsize);
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells*sizeof(cl_int), &i_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells*sizeof(cl_int), &j_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells*sizeof(cl_int), &level_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
- for (int ic=0; ic<(int)ncells; ic++){
- int ii = i_tmp[ic];
- int jj = j_tmp[ic];
- int lev = level_tmp[ic];
- int levmult = IPOW2(levmx-lev);
- int jjj=jj *levmult-jminsize;
- int iii=max( ii *levmult-1, 0 )-iminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- jjj=jj *levmult-jminsize;
- iii=min( (ii+1)*levmult, imaxcalc-1)-iminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- jjj=max( jj *levmult-1, 0) -jminsize;
- iii=ii *levmult -iminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- jjj=min( (jj+1)*levmult, jmaxcalc-1)-jminsize;
- iii=ii *levmult -iminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- int nlftval = hash_tmp[(( jj *levmult )-jminsize)*(imaxsize-iminsize)+((max( ii *levmult-1, 0 ))-iminsize)];
- int nrhtval = hash_tmp[(( jj *levmult )-jminsize)*(imaxsize-iminsize)+((min( (ii+1)*levmult, imaxcalc-1))-iminsize)];
- int nbotval = hash_tmp[((max( jj *levmult-1, 0) )-jminsize)*(imaxsize-iminsize)+(( ii *levmult )-iminsize)];
- int ntopval = hash_tmp[((min( (jj+1)*levmult, jmaxcalc-1))-jminsize)*(imaxsize-iminsize)+(( ii *levmult )-iminsize)];
-
- if (nlftval == INT_MIN){
- jjj = jj*levmult-jminsize;
- iii = ii*levmult-iminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- }
- if (nrhtval == INT_MIN){
- jjj = jj*levmult-jminsize;
- iii = ii*levmult-iminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- }
- if (nbotval == INT_MIN) {
- iii = ii*levmult-iminsize;
- jjj = jj*levmult-jminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- }
- if (ntopval == INT_MIN) {
- iii = ii*levmult-iminsize;
- jjj = jj*levmult-jminsize;
- if (jjj < 0 || jjj >= (jmaxsize-jminsize) || iii < 0 || iii >= (imaxsize-iminsize) ) printf("%d: Warning at line %d iii %d jjj %d iminsize %d imaxsize %d jminsize %d jmaxsize %d\n",mype,__LINE__,iii,jjj,iminsize,imaxsize,jminsize,jmaxsize);
- }
- }
- }
-#endif
-
- cl_event calc_neighbors_local_event;
-
- /*
- const int isize, // 0
- const int levmx, // 1
- const int imaxsize, // 2
- const int jmaxsize, // 3
- const int noffset, // 4
- __global int *sizes, // 5
- __global int *levtable, // 6
- __global int *level, // 7
- __global int *i, // 8
- __global int *j, // 9
- __global int *nlft, // 10
- __global int *nrht, // 11
- __global int *nbot, // 12
- __global int *ntop, // 13
- __global const ulong *hash_heaer, // 14
- __global int *hash) // 15
- */
-
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 2, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 3, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 4, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 5, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 6, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 7, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 8, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 9, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 10, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 11, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 12, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 13, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 14, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_calc_neighbors_local, 15, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_neighbors_local, 1, NULL, &global_work_size, &local_work_size, &calc_neighbors_local_event);
-
- ezcl_wait_for_events(1, &calc_neighbors_local_event);
- ezcl_event_release(calc_neighbors_local_event);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_HASH_QUERY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
- print_dev_local();
-
- vector<int> hash_tmp(hashsize);
- ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_FALSE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
-
- cl_mem dev_hash_header_check = gpu_get_hash_header();
- vector<ulong> hash_header_check(hash_header_size);
- ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
-
- int gpu_hash_method = (int)hash_header_check[0];
- ulong gpu_hash_table_size = hash_header_check[1];
- ulong gpu_AA = hash_header_check[2];
- ulong gpu_BB = hash_header_check[3];
-
- vector<int> nlft_tmp(ncells_ghost);
- vector<int> nrht_tmp(ncells_ghost);
- vector<int> nbot_tmp(ncells_ghost);
- vector<int> ntop_tmp(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH 0 numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nlft numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",nlft_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nrht numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",nrht_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nbot numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",nbot_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n ntop numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0])-noffset;
- if (hashval >= 0 && hashval < (int)ncells) {
- fprintf(fp,"%5d",ntop_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
- }
-
-#ifdef HAVE_MPI
- if (numpe > 1) {
- vector<int> iminsize_global(numpe);
- vector<int> imaxsize_global(numpe);
- vector<int> jminsize_global(numpe);
- vector<int> jmaxsize_global(numpe);
- vector<int> comm_partner(numpe,-1);
-
- MPI_Allgather(&iminsize, 1, MPI_INT, &iminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
- MPI_Allgather(&imaxsize, 1, MPI_INT, &imaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
- MPI_Allgather(&jminsize, 1, MPI_INT, &jminsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
- MPI_Allgather(&jmaxsize, 1, MPI_INT, &jmaxsize_global[0], 1, MPI_INT, MPI_COMM_WORLD);
-
- int num_comm_partners = 0;
- for (int ip = 0; ip < numpe; ip++){
- if (ip == mype) continue;
- if (iminsize_global[ip] > imaxtile) continue;
- if (imaxsize_global[ip] < imintile) continue;
- if (jminsize_global[ip] > jmaxtile) continue;
- if (jmaxsize_global[ip] < jmintile) continue;
- comm_partner[num_comm_partners] = ip;
- num_comm_partners++;
- //if (DEBUG) fprintf(fp,"%d: overlap with processor %d bounding box is %d %d %d %d\n",mype,ip,iminsize_global[ip],imaxsize_global[ip],jminsize_global[ip],jmaxsize_global[ip]);
- }
-
-#ifdef BOUNDS_CHECK
- {
- vector<int> nlft_tmp(ncells_ghost);
- vector<int> nrht_tmp(ncells_ghost);
- vector<int> nbot_tmp(ncells_ghost);
- vector<int> ntop_tmp(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells*sizeof(cl_int), &ntop_tmp[0], NULL);
- for (uint ic=0; ic<ncells; ic++){
- int nl = nlft_tmp[ic];
- if (nl != -1){
- nl -= noffset;
- if (nl<0 || nl>= ncells) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
- }
- int nr = nrht_tmp[ic];
- if (nr != -1){
- nr -= noffset;
- if (nr<0 || nr>= ncells) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
- }
- int nb = nbot_tmp[ic];
- if (nb != -1){
- nb -= noffset;
- if (nb<0 || nb>= ncells) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
- }
- int nt = ntop_tmp[ic];
- if (nt != -1){
- nt -= noffset;
- if (nt<0 || nt>= ncells) printf("%d: Warning at line %d cell %d ntop %d\n",mype,__LINE__,ic,nt);
- }
- }
- }
-#endif
-
- cl_mem dev_border_cell = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell1"), &ncells, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_set_kernel_arg(kernel_calc_border_cells, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_border_cells, 1, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_calc_border_cells, 2, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_calc_border_cells, 3, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_calc_border_cells, 4, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_calc_border_cells, 5, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_calc_border_cells, 6, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_border_cells, 7, sizeof(cl_mem), (void *)&dev_border_cell);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_border_cells, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- cl_mem dev_border_cell_new = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell2"), &ncells, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- size_t one = 1;
- cl_mem dev_nbsize = ezcl_malloc(NULL, const_cast<char *>("dev_nbsize"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_ioffset = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &block_size, sizeof(cl_uint), CL_MEM_READ_WRITE, 0);
-
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 1, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 2, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 3, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 4, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 5, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 6, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 7, sizeof(cl_mem), (void *)&dev_border_cell);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 8, sizeof(cl_mem), (void *)&dev_border_cell_new);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 9, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 10, sizeof(cl_mem), (void *)&dev_nbsize);
- ezcl_set_kernel_arg(kernel_calc_border_cells2, 11, local_work_size*sizeof(cl_int), NULL);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_border_cells2, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- ezcl_device_memory_swap(&dev_border_cell, &dev_border_cell_new);
- ezcl_device_memory_delete(dev_border_cell_new);
-
- int group_size = (int)(global_work_size/local_work_size);
-
- ezcl_set_kernel_arg(kernel_finish_scan, 0, sizeof(cl_int), (void *)&group_size);
- ezcl_set_kernel_arg(kernel_finish_scan, 1, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_finish_scan, 2, sizeof(cl_mem), (void *)&dev_nbsize);
- ezcl_set_kernel_arg(kernel_finish_scan, 3, local_work_size*sizeof(cl_int), NULL);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_scan, 1, NULL, &local_work_size, &local_work_size, NULL);
-
- int nbsize_local;
- ezcl_enqueue_read_buffer(command_queue, dev_nbsize, CL_TRUE, 0, 1*sizeof(cl_int), &nbsize_local, NULL);
- ezcl_device_memory_delete(dev_nbsize);
-
- //printf("%d: border cell size is %d global is %ld\n",mype,nbsize_local,nbsize_global);
-
- vector<int> border_cell_num(nbsize_local);
- vector<int> border_cell_i(nbsize_local);
- vector<int> border_cell_j(nbsize_local);
- vector<int> border_cell_level(nbsize_local);
-
- // allocate new border memory
- size_t nbsize_long = nbsize_local;
- cl_mem dev_border_cell_i = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_i"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_border_cell_j = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_j"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_border_cell_level = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_level"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_border_cell_num = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_num"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_set_kernel_arg(kernel_get_border_data, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_get_border_data, 1, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_get_border_data, 2, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_get_border_data, 3, sizeof(cl_mem), (void *)&dev_border_cell);
- ezcl_set_kernel_arg(kernel_get_border_data, 4, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_get_border_data, 5, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_get_border_data, 6, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_get_border_data, 7, sizeof(cl_mem), (void *)&dev_border_cell_i);
- ezcl_set_kernel_arg(kernel_get_border_data, 8, sizeof(cl_mem), (void *)&dev_border_cell_j);
- ezcl_set_kernel_arg(kernel_get_border_data, 9, sizeof(cl_mem), (void *)&dev_border_cell_level);
- ezcl_set_kernel_arg(kernel_get_border_data, 10, sizeof(cl_mem), (void *)&dev_border_cell_num);
- ezcl_set_kernel_arg(kernel_get_border_data, 11, local_work_size*sizeof(cl_uint), NULL);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_get_border_data, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- ezcl_device_memory_delete(dev_ioffset);
- ezcl_device_memory_delete(dev_border_cell);
-
- // read gpu border cell data
- ezcl_enqueue_read_buffer(command_queue, dev_border_cell_i, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_i[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_border_cell_j, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_j[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_border_cell_level, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_level[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_border_cell_num, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_num[0], NULL);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_FIND_BOUNDARY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- // Allocate push database
-
- int **send_database = (int**)malloc(num_comm_partners*sizeof(int *));
- for (int ip = 0; ip < num_comm_partners; ip++){
- send_database[ip] = (int *)malloc(nbsize_local*sizeof(int));
- }
-
- // Compute the overlap between processor bounding boxes and set up push database
-
- vector<int> send_buffer_count(num_comm_partners);
- for (int ip = 0; ip < num_comm_partners; ip++){
- int icount = 0;
- for (int ib = 0; ib <nbsize_local; ib++){
- int lev = border_cell_level[ib];
- int levmult = IPOW2(levmx-lev);
- if (border_cell_i[ib]*levmult >= iminsize_global[comm_partner[ip]] &&
- border_cell_i[ib]*levmult <= imaxsize_global[comm_partner[ip]] &&
- border_cell_j[ib]*levmult >= jminsize_global[comm_partner[ip]] &&
- border_cell_j[ib]*levmult <= jmaxsize_global[comm_partner[ip]] ) {
- send_database[ip][icount] = ib;
- icount++;
- }
- }
- send_buffer_count[ip]=icount;
- }
-
- // Initialize L7_Push_Setup with num_comm_partners, comm_partner, send_database and
- // send_buffer_count. L7_Push_Setup will copy data and determine recv_buffer_counts.
- // It will return receive_count_total for use in allocations
-
- int receive_count_total;
- int i_push_handle = 0;
- L7_Push_Setup(num_comm_partners, &comm_partner[0], &send_buffer_count[0],
- send_database, &receive_count_total, &i_push_handle);
-
- if (DEBUG) {
- fprintf(fp,"DEBUG num_comm_partners %d\n",num_comm_partners);
- for (int ip = 0; ip < num_comm_partners; ip++){
- fprintf(fp,"DEBUG comm partner is %d data count is %d\n",comm_partner[ip],send_buffer_count[ip]);
- for (int ic = 0; ic < send_buffer_count[ip]; ic++){
- int ib = send_database[ip][ic];
- fprintf(fp,"DEBUG \t index %d cell number %d i %d j %d level %d\n",ib,border_cell_num[ib],
- border_cell_i[ib],border_cell_j[ib],border_cell_level[ib]);
- }
- }
- }
-
- // Can now free the send database. Other arrays are vectors and will automatically
- // deallocate
-
- for (int ip = 0; ip < num_comm_partners; ip++){
- free(send_database[ip]);
- }
- free(send_database);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_PUSH_SETUP] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
- // Push the data needed to the adjacent processors
-
- int *border_cell_num_local = (int *)malloc(receive_count_total*sizeof(int));
- int *border_cell_i_local = (int *)malloc(receive_count_total*sizeof(int));
- int *border_cell_j_local = (int *)malloc(receive_count_total*sizeof(int));
- int *border_cell_level_local = (int *)malloc(receive_count_total*sizeof(int));
- L7_Push_Update(&border_cell_num[0], border_cell_num_local, i_push_handle);
- L7_Push_Update(&border_cell_i[0], border_cell_i_local, i_push_handle);
- L7_Push_Update(&border_cell_j[0], border_cell_j_local, i_push_handle);
- L7_Push_Update(&border_cell_level[0], border_cell_level_local, i_push_handle);
-
- L7_Push_Free(&i_push_handle);
-
- ezcl_device_memory_delete(dev_border_cell_i);
- ezcl_device_memory_delete(dev_border_cell_j);
- ezcl_device_memory_delete(dev_border_cell_level);
- ezcl_device_memory_delete(dev_border_cell_num);
-
- nbsize_local = receive_count_total;
-
- if (DEBUG) {
- for (int ic = 0; ic < nbsize_local; ic++) {
- fprintf(fp,"%d: Local Border cell %d is %d i %d j %d level %d\n",mype,ic,border_cell_num_local[ic],
- border_cell_i_local[ic],border_cell_j_local[ic],border_cell_level_local[ic]);
- }
- }
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_PUSH_BOUNDARY] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- nbsize_long = nbsize_local;
-
- dev_border_cell_num = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_num"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_border_cell_i = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_i"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_border_cell_j = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_j"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_border_cell_level = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_level"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_border_cell_needed = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_needed"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_border_cell_needed_out = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_needed_out"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_enqueue_write_buffer(command_queue, dev_border_cell_num, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_num_local[0], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_border_cell_i, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_i_local[0], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_border_cell_j, CL_FALSE, 0, nbsize_local*sizeof(cl_int), &border_cell_j_local[0], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_border_cell_level, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_level_local[0], NULL);
-
- //ezcl_enqueue_write_buffer(command_queue, dev_border_cell_needed, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0], NULL);
-
- free(border_cell_i_local);
- free(border_cell_j_local);
- free(border_cell_level_local);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_LOCAL_LIST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
- vector<int> hash_tmp(hashsize);
- ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
-
- cl_mem dev_hash_header_check = gpu_get_hash_header();
- vector<ulong> hash_header_check(hash_header_size);
- ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
-
- int gpu_hash_method = (int)hash_header_check[0];
- ulong gpu_hash_table_size = hash_header_check[1];
- ulong gpu_AA = hash_header_check[2];
- ulong gpu_BB = hash_header_check[3];
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering before layer 1\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
- }
-
- size_t nb_local_work_size = 128;
- size_t nb_global_work_size = ((nbsize_local + nb_local_work_size - 1) /nb_local_work_size) * nb_local_work_size;
-
- ezcl_set_kernel_arg(kernel_calc_layer1, 0, sizeof(cl_int), (void *)&nbsize_local);
- ezcl_set_kernel_arg(kernel_calc_layer1, 1, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_layer1, 2, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_calc_layer1, 3, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_calc_layer1, 4, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_calc_layer1, 5, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_calc_layer1, 6, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_calc_layer1, 7, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_calc_layer1, 8, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_layer1, 9, sizeof(cl_mem), (void *)&dev_border_cell_i);
- ezcl_set_kernel_arg(kernel_calc_layer1, 10, sizeof(cl_mem), (void *)&dev_border_cell_j);
- ezcl_set_kernel_arg(kernel_calc_layer1, 11, sizeof(cl_mem), (void *)&dev_border_cell_level);
- ezcl_set_kernel_arg(kernel_calc_layer1, 12, sizeof(cl_mem), (void *)&dev_border_cell_needed);
- ezcl_set_kernel_arg(kernel_calc_layer1, 13, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_calc_layer1, 14, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer1, 1, NULL, &nb_global_work_size, &nb_local_work_size, NULL);
-
- if (DEBUG){
- vector<int> border_cell_needed_local(nbsize_local);
-
- ezcl_enqueue_read_buffer(command_queue, dev_border_cell_needed, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0], NULL);
-
- for(int ic=0; ic<nbsize_local; ic++){
- if (border_cell_needed_local[ic] == 0) continue;
- fprintf(fp,"%d: First set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
- }
- }
-
- cl_event calc_layer1_sethash_event;
-
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 0, sizeof(cl_int), (void *)&nbsize_local);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 1, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 2, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 3, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 4, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 5, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 6, sizeof(cl_mem), (void *)&dev_border_cell_i);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 7, sizeof(cl_mem), (void *)&dev_border_cell_j);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 8, sizeof(cl_mem), (void *)&dev_border_cell_level);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 9, sizeof(cl_mem), (void *)&dev_border_cell_needed);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 10, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_calc_layer1_sethash, 11, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer1_sethash, 1, NULL, &nb_global_work_size, &nb_local_work_size, &calc_layer1_sethash_event);
-
- ezcl_wait_for_events(1, &calc_layer1_sethash_event);
- ezcl_event_release(calc_layer1_sethash_event);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_LAYER1] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG) {
- print_dev_local();
-
- vector<int> hash_tmp(hashsize);
- ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
-
- cl_mem dev_hash_header_check = gpu_get_hash_header();
- vector<ulong> hash_header_check(hash_header_size);
- ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
-
- int gpu_hash_method = (int)hash_header_check[0];
- ulong gpu_hash_table_size = hash_header_check[1];
- ulong gpu_AA = hash_header_check[2];
- ulong gpu_BB = hash_header_check[3];
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering for 1 layer\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
- }
-
- group_size = (int)(nb_global_work_size/nb_local_work_size);
-
- cl_mem dev_nbpacked = ezcl_malloc(NULL, const_cast<char *>("dev_nbpacked"), &one, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- size_t group_size_long = group_size;
- dev_ioffset = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &group_size_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_set_kernel_arg(kernel_calc_layer2, 0, sizeof(cl_int), (void *)&nbsize_local);
- ezcl_set_kernel_arg(kernel_calc_layer2, 1, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_layer2, 2, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_calc_layer2, 3, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_calc_layer2, 4, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_calc_layer2, 5, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_calc_layer2, 6, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_calc_layer2, 7, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_calc_layer2, 8, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_layer2, 9, sizeof(cl_mem), (void *)&dev_border_cell_i);
- ezcl_set_kernel_arg(kernel_calc_layer2, 10, sizeof(cl_mem), (void *)&dev_border_cell_j);
- ezcl_set_kernel_arg(kernel_calc_layer2, 11, sizeof(cl_mem), (void *)&dev_border_cell_level);
- ezcl_set_kernel_arg(kernel_calc_layer2, 12, sizeof(cl_mem), (void *)&dev_border_cell_needed);
- ezcl_set_kernel_arg(kernel_calc_layer2, 13, sizeof(cl_mem), (void *)&dev_border_cell_needed_out);
- ezcl_set_kernel_arg(kernel_calc_layer2, 14, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_calc_layer2, 15, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_set_kernel_arg(kernel_calc_layer2, 16, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_calc_layer2, 17, sizeof(cl_mem), (void *)&dev_nbpacked);
- ezcl_set_kernel_arg(kernel_calc_layer2, 18, nb_local_work_size*sizeof(cl_mem), NULL);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer2, 1, NULL, &nb_global_work_size, &nb_local_work_size, NULL);
-
- if (DEBUG){
- vector<int> border_cell_needed_local(nbsize_local);
-
- ezcl_enqueue_read_buffer(command_queue, dev_border_cell_needed_out, CL_TRUE, 0, nbsize_local*sizeof(cl_int), &border_cell_needed_local[0], NULL);
- for(int ic=0; ic<nbsize_local; ic++){
- if (border_cell_needed_local[ic] <= 0) continue;
- if (border_cell_needed_local[ic] < 0x0016) fprintf(fp,"%d: First set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
- if (border_cell_needed_local[ic] >= 0x0016) fprintf(fp,"%d: Second set of needed cells ic %3d cell %3d type %3d\n",mype,ic,border_cell_num_local[ic],border_cell_needed_local[ic]);
- }
- }
-
- free(border_cell_num_local);
-
- ezcl_device_memory_delete(dev_border_cell_needed);
-
- ezcl_set_kernel_arg(kernel_finish_scan, 0, sizeof(cl_int), (void *)&group_size);
- ezcl_set_kernel_arg(kernel_finish_scan, 1, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_finish_scan, 2, sizeof(cl_mem), (void *)&dev_nbpacked);
- ezcl_set_kernel_arg(kernel_finish_scan, 3, nb_local_work_size*sizeof(cl_int), NULL);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_finish_scan, 1, NULL, &nb_local_work_size, &nb_local_work_size, NULL);
-
- int nbpacked;
- ezcl_enqueue_read_buffer(command_queue, dev_nbpacked, CL_TRUE, 0, 1*sizeof(cl_int), &nbpacked, NULL);
- ezcl_device_memory_delete(dev_nbpacked);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_LAYER2] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- nbsize_long = nbsize_local;
- cl_mem dev_border_cell_i_new = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_i_new"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_border_cell_j_new = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_j_new"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_border_cell_level_new = ezcl_malloc(NULL, const_cast<char *>("dev_border_cell_level_new"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_indices_needed = ezcl_malloc(NULL, const_cast<char *>("dev_indices_needed"), &nbsize_long, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- cl_event get_border_data2_event;
-
- ezcl_set_kernel_arg(kernel_get_border_data2, 0, sizeof(cl_int), (void *)&nbsize_local);
- ezcl_set_kernel_arg(kernel_get_border_data2, 1, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_get_border_data2, 2, sizeof(cl_mem), (void *)&dev_border_cell_needed_out);
- ezcl_set_kernel_arg(kernel_get_border_data2, 3, sizeof(cl_mem), (void *)&dev_border_cell_i);
- ezcl_set_kernel_arg(kernel_get_border_data2, 4, sizeof(cl_mem), (void *)&dev_border_cell_j);
- ezcl_set_kernel_arg(kernel_get_border_data2, 5, sizeof(cl_mem), (void *)&dev_border_cell_level);
- ezcl_set_kernel_arg(kernel_get_border_data2, 6, sizeof(cl_mem), (void *)&dev_border_cell_num);
- ezcl_set_kernel_arg(kernel_get_border_data2, 7, sizeof(cl_mem), (void *)&dev_border_cell_i_new);
- ezcl_set_kernel_arg(kernel_get_border_data2, 8, sizeof(cl_mem), (void *)&dev_border_cell_j_new);
- ezcl_set_kernel_arg(kernel_get_border_data2, 9, sizeof(cl_mem), (void *)&dev_border_cell_level_new);
- ezcl_set_kernel_arg(kernel_get_border_data2, 10, sizeof(cl_mem), (void *)&dev_indices_needed);
- ezcl_set_kernel_arg(kernel_get_border_data2, 11, local_work_size*sizeof(cl_uint), NULL);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_get_border_data2, 1, NULL, &nb_global_work_size, &nb_local_work_size, &get_border_data2_event);
-
- ezcl_device_memory_delete(dev_border_cell_num);
-
- ezcl_device_memory_swap(&dev_border_cell_i, &dev_border_cell_i_new);
- ezcl_device_memory_swap(&dev_border_cell_j, &dev_border_cell_j_new);
- ezcl_device_memory_swap(&dev_border_cell_level, &dev_border_cell_level_new);
-
- size_t nbp_local_work_size = 128;
- size_t nbp_global_work_size = ((nbpacked + nbp_local_work_size - 1) /nbp_local_work_size) * nbp_local_work_size;
-
- cl_event calc_layer2_sethash_event;
-
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 0, sizeof(cl_int), (void *)&nbpacked);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 1, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 2, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 3, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 4, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 5, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 6, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 7, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 8, sizeof(cl_mem), (void *)&dev_levibeg);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 9, sizeof(cl_mem), (void *)&dev_leviend);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 10, sizeof(cl_mem), (void *)&dev_levjbeg);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 11, sizeof(cl_mem), (void *)&dev_levjend);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 12, sizeof(cl_mem), (void *)&dev_border_cell_i);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 13, sizeof(cl_mem), (void *)&dev_border_cell_j);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 14, sizeof(cl_mem), (void *)&dev_border_cell_level);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 15, sizeof(cl_mem), (void *)&dev_indices_needed);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 16, sizeof(cl_mem), (void *)&dev_border_cell_needed_out);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 17, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_calc_layer2_sethash, 18, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_layer2_sethash, 1, NULL, &nbp_global_work_size, &nbp_local_work_size, &calc_layer2_sethash_event);
-
- ezcl_wait_for_events(1, &calc_layer2_sethash_event);
- ezcl_event_release(calc_layer2_sethash_event);
-
- ezcl_device_memory_delete(dev_ioffset);
-
- ezcl_wait_for_events(1, &get_border_data2_event);
- ezcl_event_release(get_border_data2_event);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_LAYER_LIST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- vector<int> indices_needed(nbpacked);
-
- // read gpu border cell data
- ezcl_enqueue_read_buffer(command_queue, dev_indices_needed, CL_TRUE, 0, nbpacked*sizeof(cl_int), &indices_needed[0], NULL);
-
- ezcl_device_memory_delete(dev_border_cell_i_new);
- ezcl_device_memory_delete(dev_border_cell_j_new);
- ezcl_device_memory_delete(dev_border_cell_level_new);
-
- if (DEBUG) {
- print_dev_local();
-
- vector<int> hash_tmp(hashsize);
- ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_TRUE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
-
- cl_mem dev_hash_header_check = gpu_get_hash_header();
- vector<ulong> hash_header_check(hash_header_size);
- ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
-
- int gpu_hash_method = (int)hash_header_check[0];
- ulong gpu_hash_table_size = hash_header_check[1];
- ulong gpu_AA = hash_header_check[2];
- ulong gpu_BB = hash_header_check[3];
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering for 2 layer\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
- fflush(fp);
- }
-
- ezcl_device_memory_delete(dev_border_cell_needed_out);
-
- int nghost = nbpacked;
- ncells_ghost = ncells + nghost;
-
- //if (mype == 1) printf("%d: DEBUG before expanding memory ncells %ld ncells_ghost %ld capacity %ld\n",mype,ncells,ncells_ghost,ezcl_get_device_mem_capacity(dev_i));
- if (ezcl_get_device_mem_capacity(dev_celltype) < ncells_ghost ||
- ezcl_get_device_mem_capacity(dev_i) < ncells_ghost ||
- ezcl_get_device_mem_capacity(dev_j) < ncells_ghost ||
- ezcl_get_device_mem_capacity(dev_level) < ncells_ghost ||
- ezcl_get_device_mem_capacity(dev_nlft) < ncells_ghost ||
- ezcl_get_device_mem_capacity(dev_nrht) < ncells_ghost ||
- ezcl_get_device_mem_capacity(dev_nbot) < ncells_ghost ||
- ezcl_get_device_mem_capacity(dev_ntop) < ncells_ghost ) {
-
- //if (mype == 0) printf("%d: DEBUG expanding memory ncells %ld ncells_ghost %ld capacity %ld\n",mype,ncells,ncells_ghost,ezcl_get_device_mem_capacity(dev_i));
- //printf("%d: DEBUG expanding memory ncells %ld ncells_ghost %ld capacity %ld\n",mype,ncells,ncells_ghost,ezcl_get_device_mem_capacity(dev_i));
- mem_factor = (float)(ncells_ghost/ncells);
- cl_mem dev_celltype_old = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_i_old = ezcl_malloc(NULL, const_cast<char *>("dev_i_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_j_old = ezcl_malloc(NULL, const_cast<char *>("dev_j_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_level_old = ezcl_malloc(NULL, const_cast<char *>("dev_level_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_nlft_old = ezcl_malloc(NULL, const_cast<char *>("dev_nlft_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_nrht_old = ezcl_malloc(NULL, const_cast<char *>("dev_nrht_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_nbot_old = ezcl_malloc(NULL, const_cast<char *>("dev_nbot_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_ntop_old = ezcl_malloc(NULL, const_cast<char *>("dev_ntop_old"), &ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_device_memory_swap(&dev_celltype_old, &dev_celltype);
- ezcl_device_memory_swap(&dev_i_old, &dev_i );
- ezcl_device_memory_swap(&dev_j_old, &dev_j );
- ezcl_device_memory_swap(&dev_level_old, &dev_level );
- ezcl_device_memory_swap(&dev_nlft_old, &dev_nlft );
- ezcl_device_memory_swap(&dev_nrht_old, &dev_nrht );
- ezcl_device_memory_swap(&dev_nbot_old, &dev_nbot );
- ezcl_device_memory_swap(&dev_ntop_old, &dev_ntop );
-
- cl_event copy_mesh_data_event;
-
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 1, sizeof(cl_mem), (void *)&dev_celltype_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 2, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 3, sizeof(cl_mem), (void *)&dev_i_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 4, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 5, sizeof(cl_mem), (void *)&dev_j_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 6, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 7, sizeof(cl_mem), (void *)&dev_level_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 8, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 9, sizeof(cl_mem), (void *)&dev_nlft_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 10, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 11, sizeof(cl_mem), (void *)&dev_nrht_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 12, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 13, sizeof(cl_mem), (void *)&dev_nbot_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 14, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 15, sizeof(cl_mem), (void *)&dev_ntop_old);
- ezcl_set_kernel_arg(kernel_copy_mesh_data, 16, sizeof(cl_mem), (void *)&dev_ntop);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_mesh_data, 1, NULL, &global_work_size, &local_work_size, ©_mesh_data_event);
-
- ezcl_device_memory_delete(dev_celltype_old);
- ezcl_device_memory_delete(dev_i_old);
- ezcl_device_memory_delete(dev_j_old);
- ezcl_device_memory_delete(dev_level_old);
- ezcl_device_memory_delete(dev_nlft_old);
- ezcl_device_memory_delete(dev_nrht_old);
- ezcl_device_memory_delete(dev_nbot_old);
- ezcl_device_memory_delete(dev_ntop_old);
-
- ezcl_wait_for_events(1, ©_mesh_data_event);
- ezcl_event_release(copy_mesh_data_event);
- }
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_COPY_MESH_DATA] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- nb_global_work_size = ((nbpacked + nb_local_work_size - 1) /nb_local_work_size) * nb_local_work_size;
-
-#ifdef BOUNDS_CHECK
- if (ezcl_get_device_mem_nelements(dev_i) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_j) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_level) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_celltype) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){
- printf("DEBUG size issue at %d\n",__LINE__);
- }
- if (ezcl_get_device_mem_nelements(dev_border_cell_i) < nbpacked ||
- ezcl_get_device_mem_nelements(dev_border_cell_j) < nbpacked ||
- ezcl_get_device_mem_nelements(dev_border_cell_level) < nbpacked ){
- printf("DEBUG size issue at %d\n",__LINE__);
- }
-#endif
-
- cl_event fill_mesh_ghost_event;
-
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 0, sizeof(cl_int), (void *)&nbpacked);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 1, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 2, sizeof(cl_mem), (void *)&dev_levibeg);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 3, sizeof(cl_mem), (void *)&dev_leviend);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 4, sizeof(cl_mem), (void *)&dev_levjbeg);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 5, sizeof(cl_mem), (void *)&dev_levjend);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 6, sizeof(cl_mem), (void *)&dev_border_cell_i);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 7, sizeof(cl_mem), (void *)&dev_border_cell_j);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 8, sizeof(cl_mem), (void *)&dev_border_cell_level);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 9, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 10, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 11, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 12, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 13, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 14, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 15, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_fill_mesh_ghost, 16, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_fill_mesh_ghost, 1, NULL, &nb_global_work_size, &nb_local_work_size, &fill_mesh_ghost_event);
-
- ezcl_wait_for_events(1, &fill_mesh_ghost_event);
- ezcl_event_release(fill_mesh_ghost_event);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_FILL_MESH_GHOST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG){
- fprintf(fp,"After copying i,j, level to ghost cells\n");
- print_dev_local();
- }
-
- ezcl_device_memory_delete(dev_border_cell_i);
- ezcl_device_memory_delete(dev_border_cell_j);
- ezcl_device_memory_delete(dev_border_cell_level);
-
- size_t ghost_local_work_size = 128;
- size_t ghost_global_work_size = ((ncells_ghost + ghost_local_work_size - 1) /ghost_local_work_size) * ghost_local_work_size;
-
- cl_event fill_neighbor_ghost_event;
-
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 0, sizeof(cl_int), (void *)&ncells_ghost);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 2, sizeof(cl_int), (void *)&imax);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 3, sizeof(cl_int), (void *)&jmax);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 4, sizeof(cl_mem), (void *)&dev_sizes);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 5, sizeof(cl_mem), (void *)&dev_levtable);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 6, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 7, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 8, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 9, sizeof(cl_mem), (void *)&dev_hash_header);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 10, sizeof(cl_mem), (void *)&dev_hash);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 11, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 12, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 13, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_fill_neighbor_ghost, 14, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_fill_neighbor_ghost, 1, NULL, &ghost_global_work_size, &ghost_local_work_size, &fill_neighbor_ghost_event);
-
- ezcl_wait_for_events(1, &fill_neighbor_ghost_event);
- ezcl_event_release(fill_neighbor_ghost_event);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_FILL_NEIGH_GHOST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG){
- fprintf(fp,"After setting neighbors through ghost cells\n");
- print_dev_local();
- }
-
-#ifdef BOUNDS_CHECK
- if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){
- printf("%d: Warning sizes for set_corner_neighbor not right ncells ghost %d nlft size %d\n",mype,ncells_ghost,ezcl_get_device_mem_nelements(dev_nlft));
- }
-#endif
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_SET_CORNER_NEIGH] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- if (DEBUG){
- fprintf(fp,"After setting corner neighbors\n");
- print_dev_local();
- }
-
-#ifdef BOUNDS_CHECK
- if (ezcl_get_device_mem_nelements(dev_nlft) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_nrht) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_nbot) < (int)ncells_ghost ||
- ezcl_get_device_mem_nelements(dev_ntop) < (int)ncells_ghost ){
- printf("%d: Warning sizes for adjust neighbors not right ncells ghost %d nlft size %d\n",mype,ncells_ghost,ezcl_get_device_mem_nelements(dev_nlft));
- }
- if (ezcl_get_device_mem_nelements(dev_indices_needed) < (int)(ncells_ghost-ncells) ){
- printf("%d: Warning indices size wrong nghost %d size indices_needed\n",mype,ncells_ghost-ncells,ezcl_get_device_mem_nelements(dev_indices_needed));
- }
-#endif
-
- cl_event adjust_neighbors_local_event;
-
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 0, sizeof(cl_int), (void *)&ncells_ghost);
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 1, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 2, sizeof(cl_int), (void *)&noffset);
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 3, sizeof(cl_mem), (void *)&dev_indices_needed);
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 4, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 5, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 6, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_adjust_neighbors_local, 7, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_adjust_neighbors_local, 1, NULL, &ghost_global_work_size, &ghost_local_work_size, &adjust_neighbors_local_event);
-
- ezcl_device_memory_delete(dev_indices_needed);
-
- if (DEBUG){
- fprintf(fp,"After adjusting neighbors to local indices\n");
- print_dev_local();
- }
-
- ezcl_wait_for_events(1, &adjust_neighbors_local_event);
- ezcl_event_release(adjust_neighbors_local_event);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_NEIGH_ADJUST] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- cpu_timer_start(&tstart_lev2);
- }
-
- offtile_ratio_local = (offtile_ratio_local*(double)offtile_local_count) + ((double)nghost / (double)ncells);
- offtile_local_count++;
- offtile_ratio_local /= offtile_local_count;
-
- if (cell_handle) L7_Free(&cell_handle);
- cell_handle=0;
-
- if (DEBUG){
- fprintf(fp,"%d: SETUP ncells %ld noffset %d nghost %d\n",mype,ncells,noffset,nghost);
- for (int ic=0; ic<nghost; ic++){
- fprintf(fp,"%d: indices needed ic %d index %d\n",mype,ic,indices_needed[ic]);
- }
- }
-
- L7_Dev_Setup(0, noffset, ncells, &indices_needed[0], nghost, &cell_handle);
-
-#ifdef BOUNDS_CHECK
- {
- vector<int> nlft_tmp(ncells_ghost);
- vector<int> nrht_tmp(ncells_ghost);
- vector<int> nbot_tmp(ncells_ghost);
- vector<int> ntop_tmp(ncells_ghost);
- vector<int> level_tmp(ncells_ghost);
- vector<real_t> H_tmp(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
- for (uint ic=0; ic<ncells; ic++){
- int nl = nlft_tmp[ic];
- if (nl<0 || nl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mype,__LINE__,ic,nl);
- if (level_tmp[nl] > level_tmp[ic]){
- int ntl = ntop_tmp[nl];
- if (ntl<0 || ntl>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mype,__LINE__,ic,ic+noffset,nl,ntl);
- }
- int nr = nrht_tmp[ic];
- if (nr<0 || nr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mype,__LINE__,ic,nr);
- if (level_tmp[nr] > level_tmp[ic]){
- int ntr = ntop_tmp[nr];
- if (ntr<0 || ntr>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mype,__LINE__,ic,ntr);
- }
- int nb = nbot_tmp[ic];
- if (nb<0 || nb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mype,__LINE__,ic,nb);
- if (level_tmp[nb] > level_tmp[ic]){
- int nrb = nrht_tmp[nb];
- if (nrb<0 || nrb>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mype,__LINE__,ic,nrb);
- }
- int nt = ntop_tmp[ic];
- if (nt<0 || nt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d global %d ntop %d ncells %ld ncells_ghost %ld\n",mype,__LINE__,ic,ic+noffset,nt,ncells,ncells_ghost);
- if (level_tmp[nt] > level_tmp[ic]){
- int nrt = nrht_tmp[nt];
- if (nrt<0 || nrt>= (int)ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mype,__LINE__,ic,nrt);
- }
- }
- }
-#endif
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[MESH_TIMER_SETUP_COMM] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- }
-
- if (DEBUG) {
- print_dev_local();
-
- vector<int> hash_tmp(hashsize);
- ezcl_enqueue_read_buffer(command_queue, dev_hash, CL_FALSE, 0, hashsize*sizeof(cl_int), &hash_tmp[0], NULL);
-
- cl_mem dev_hash_header_check = gpu_get_hash_header();
- vector<ulong> hash_header_check(hash_header_size);
- ezcl_enqueue_read_buffer(command_queue, dev_hash_header_check, CL_TRUE, 0, hash_header_size*sizeof(cl_ulong), &hash_header_check[0], NULL);
-
- int gpu_hash_method = (int)hash_header_check[0];
- ulong gpu_hash_table_size = hash_header_check[1];
- ulong gpu_AA = hash_header_check[2];
- ulong gpu_BB = hash_header_check[3];
-
- vector<int> nlft_tmp(ncells_ghost);
- vector<int> nrht_tmp(ncells_ghost);
- vector<int> nbot_tmp(ncells_ghost);
- vector<int> ntop_tmp(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
-
- int jmaxglobal = (jmax+1)*IPOW2(levmx);
- int imaxglobal = (imax+1)*IPOW2(levmx);
- fprintf(fp,"\n HASH numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- if (ii >= iminsize && ii < imaxsize) {
- fprintf(fp,"%5d",read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) );
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nlft numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
- if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
- fprintf(fp,"%5d",nlft_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nrht numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
- if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
- fprintf(fp,"%5d",nrht_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n nbot numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
- if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
- fprintf(fp,"%5d",nbot_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
-
- fprintf(fp,"\n ntop numbering\n");
- for (int jj = jmaxglobal-1; jj>=0; jj--){
- fprintf(fp,"%2d: %4d:",mype,jj);
- if (jj >= jminsize && jj < jmaxsize) {
- for (int ii = 0; ii<imaxglobal; ii++){
- int hashval = read_dev_hash(gpu_hash_method, gpu_hash_table_size, gpu_AA, gpu_BB, (jj-jminsize)*(imaxsize-iminsize)+(ii-iminsize), &hash_tmp[0]) -noffset;
- if ( (ii >= iminsize && ii < imaxsize) && (hashval >= 0 && hashval < (int)ncells) ) {
- fprintf(fp,"%5d",ntop_tmp[hashval]);
- } else {
- fprintf(fp," ");
- }
- }
- }
- fprintf(fp,"\n");
- }
- fprintf(fp,"%2d: ",mype);
- for (int ii = 0; ii<imaxglobal; ii++){
- fprintf(fp,"%4d:",ii);
- }
- fprintf(fp,"\n");
- }
-
- if (DEBUG) {
- print_dev_local();
-
- vector<int> i_tmp(ncells_ghost);
- vector<int> j_tmp(ncells_ghost);
- vector<int> level_tmp(ncells_ghost);
- vector<int> nlft_tmp(ncells_ghost);
- vector<int> nrht_tmp(ncells_ghost);
- vector<int> nbot_tmp(ncells_ghost);
- vector<int> ntop_tmp(ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &i_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &j_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
-
- for (uint ic=0; ic<ncells; ic++){
- fprintf(fp,"%d: before update ic %d i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
- mype,ic,i_tmp[ic],j_tmp[ic],level_tmp[ic],nlft_tmp[ic],nrht_tmp[ic],nbot_tmp[ic],ntop_tmp[ic]);
- }
- int ig=0;
- for (uint ic=ncells; ic<ncells_ghost; ic++, ig++){
- fprintf(fp,"%d: after update ic %d off %d i %d j %d lev %d nlft %d nrht %d nbot %d ntop %d\n",
- mype,ic,indices_needed[ig],i_tmp[ic],j_tmp[ic],level_tmp[ic],nlft_tmp[ic],nrht_tmp[ic],nbot_tmp[ic],ntop_tmp[ic]);
- }
- }
- }
-#endif
-
- ezcl_device_memory_delete(dev_sizes);
- ezcl_device_memory_delete(dev_check);
-
- gpu_compact_hash_delete(dev_hash, dev_hash_header);
-
- gpu_timers[MESH_TIMER_CALC_NEIGHBORS] += (long)(cpu_timer_stop(tstart_cpu) * 1.0e9);
-}
-#endif
-
-void Mesh::print_calc_neighbor_type(void)
-{
- if ( calc_neighbor_type == HASH_TABLE ) {
- if (mype == 0) printf("Using hash tables to calculate neighbors\n");
- if (mype == 0 && numpe == 1) final_hash_collision_report();
- } else {
- printf("hash table size %ld\n",ncells*(int)log(ncells)*sizeof(int));
- if (mype == 0) printf("Using k-D tree to calculate neighbors\n");
- }
-}
-
-int Mesh::get_calc_neighbor_type(void)
-{
- return(calc_neighbor_type );
-}
-
-void Mesh::calc_celltype_threaded(size_t ncells)
-{
- int flags=0;
-#ifdef HAVE_J7
- if (parallel) flags = LOAD_BALANCE_MEMORY;
-#endif
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- if (celltype == NULL || mesh_memory.get_memory_size(celltype) < ncells) {
- if (celltype != NULL) celltype = (int *)mesh_memory.memory_delete(celltype);
- celltype = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "celltype", flags);
- }
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (uint ic=0; ic<ncells; ++ic) {
- celltype[ic] = REAL_CELL;
- if (is_left_boundary(ic) ) celltype[ic] = LEFT_BOUNDARY;
- if (is_right_boundary(ic) ) celltype[ic] = RIGHT_BOUNDARY;
- if (is_bottom_boundary(ic) ) celltype[ic] = BOTTOM_BOUNDARY;
- if (is_top_boundary(ic)) celltype[ic] = TOP_BOUNDARY;
- }
-}
-
-void Mesh::calc_celltype(size_t ncells)
-{
- int flags = 0;
-#ifdef HAVE_J7
- if (parallel) flags = LOAD_BALANCE_MEMORY;
-#endif
-
- if (celltype == NULL || mesh_memory.get_memory_size(celltype) < ncells) {
- if (celltype != NULL) celltype = (int *)mesh_memory.memory_delete(celltype);
- celltype = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "celltype", flags);
- }
-
-#ifdef _OPENMP
-#pragma omp parallel for
-#endif
- for (uint ic=0; ic<ncells; ++ic) {
- celltype[ic] = REAL_CELL;
- if (is_left_boundary(ic) ) celltype[ic] = LEFT_BOUNDARY;
- if (is_right_boundary(ic) ) celltype[ic] = RIGHT_BOUNDARY;
- if (is_bottom_boundary(ic) ) celltype[ic] = BOTTOM_BOUNDARY;
- if (is_top_boundary(ic)) celltype[ic] = TOP_BOUNDARY;
- }
-}
-
-void Mesh::calc_symmetry(vector<int> &dsym, vector<int> &xsym, vector<int> &ysym)
-{
- TBounds box;
- vector<int> index_list( IPOW2(levmx*levmx) );
-
- int num;
- for (uint ic=0; ic<ncells; ic++) {
- dsym[ic]=ic;
- xsym[ic]=ic;
- ysym[ic]=ic;
-
- //diagonal symmetry
- box.min.x = -1.0*(x[ic]+0.5*dx[ic]);
- box.max.x = -1.0*(x[ic]+0.5*dx[ic]);
- box.min.y = -1.0*(y[ic]+0.5*dy[ic]);
- box.max.y = -1.0*(y[ic]+0.5*dy[ic]);
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) dsym[ic]=index_list[0];
- //printf("ic %d dsym[ic] %d num %d\n",ic,dsym[ic],num);
-
- //x symmetry
- box.min.x = -1.0*(x[ic]+0.5*dx[ic]);
- box.max.x = -1.0*(x[ic]+0.5*dx[ic]);
- box.min.y = y[ic]+0.5*dy[ic];
- box.max.y = y[ic]+0.5*dy[ic];
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) xsym[ic]=index_list[0];
-
- //y symmetry
- box.min.x = x[ic]+0.5*dx[ic];
- box.max.x = x[ic]+0.5*dx[ic];
- box.min.y = -1.0*(y[ic]+0.5*dy[ic]);
- box.max.y = -1.0*(y[ic]+0.5*dy[ic]);
- KDTree_QueryBoxIntersect(&tree, &num, &(index_list[0]), &box);
- if (num == 1) ysym[ic]=index_list[0];
-
- }
-}
-
-#ifdef HAVE_MPI
-void Mesh::do_load_balance_local(size_t numcells, float *weight, MallocPlus &state_memory)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- // To get rid of compiler warning
- if (DEBUG && weight != NULL) printf("DEBUG weight[0] = %f\n",weight[0]);
-
- int ncells_old = numcells;
- int noffset_old = ndispl[mype];
-
-// Need to add weight array to load balance if it is not NULL
-// Need to add tolerance to when load balance is done
-
- int do_load_balance_global = 0;
- int nsizes_old = 0;
-
- for (int ip=0; ip<numpe; ip++){
- nsizes_old = nsizes[ip];
-
- // Calc new,even partition of data across processors
- nsizes[ip] = ncells_global/numpe;
- // Account for leftover cells
- if (ip < (int)(ncells_global%numpe)) nsizes[ip]++;
-
- if (nsizes_old != nsizes[ip]) do_load_balance_global = 1;
- }
-
- if (do_load_balance_global) {
- cpu_counters[MESH_COUNTER_LOAD_BALANCE]++;
-
- mesh_memory.memory_delete(celltype);
- mesh_memory.memory_delete(nlft);
- mesh_memory.memory_delete(nrht);
- mesh_memory.memory_delete(nbot);
- mesh_memory.memory_delete(ntop);
-
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- ncells = nsizes[mype];
- noffset=ndispl[mype];
-
- // Indices of blocks to be added to load balance
- int lower_block_start = noffset;
- int lower_block_end = min(noffset_old-1, (int)(noffset+ncells-1));
- int upper_block_start = max((int)(noffset_old+ncells_old), noffset);
- int upper_block_end = noffset+ncells-1;
-
- int lower_block_size = max(lower_block_end-lower_block_start+1,0);
- if(lower_block_end < 0) lower_block_size = 0; // Handles segfault at start of array
- int upper_block_size = max(upper_block_end-upper_block_start+1,0);
- int indices_needed_count = lower_block_size + upper_block_size;
-
- int in = 0;
-
- vector<int> indices_needed(indices_needed_count);
- for (int iz = lower_block_start; iz <= lower_block_end; iz++, in++){
- indices_needed[in]=iz;
- }
- for (int iz = upper_block_start; iz <= upper_block_end; iz++, in++){
- indices_needed[in]=iz;
- }
-
- int load_balance_handle = 0;
- L7_Setup(0, noffset_old, ncells_old, &indices_needed[0], indices_needed_count, &load_balance_handle);
-
- //printf("\n%d: DEBUG load balance report\n",mype);
-
- state_memory.memory_realloc_all(ncells_old+indices_needed_count);
-
- MallocPlus state_memory_old = state_memory;
-
-
- malloc_plus_memory_entry *memory_item;
-
- for (memory_item = state_memory_old.memory_entry_by_name_begin();
- memory_item != state_memory_old.memory_entry_by_name_end();
- memory_item = state_memory_old.memory_entry_by_name_next() ) {
-
- //if (mype == 0) printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
-
- if (memory_item->mem_elsize == 8) {
- double *mem_ptr_double = (double *)memory_item->mem_ptr;
-
- int flags = state_memory.get_memory_flags(mem_ptr_double);
- double *state_temp_double = (double *) state_memory.memory_malloc(ncells, sizeof(double),
- "state_temp_double", flags);
-
- //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
- L7_Update(mem_ptr_double, L7_DOUBLE, load_balance_handle);
- in = 0;
- if(lower_block_size > 0) {
- for(; in < MIN(lower_block_size, (int)ncells); in++) {
- state_temp_double[in] = mem_ptr_double[ncells_old + in];
- }
- }
-
- for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
- state_temp_double[in] = mem_ptr_double[ic];
- }
-
- if(upper_block_size > 0) {
- int ic = ncells_old + lower_block_size;
- for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
- state_temp_double[in] = mem_ptr_double[ic+k];
- }
- }
- state_memory.memory_replace(mem_ptr_double, state_temp_double);
- } else if (memory_item->mem_elsize == 4) {
- float *mem_ptr_float = (float *)memory_item->mem_ptr;
-
- int flags = state_memory.get_memory_flags(mem_ptr_float);
- float *state_temp_float = (float *) state_memory.memory_malloc(ncells, sizeof(float),
- "state_temp_float", flags);
-
- //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
- L7_Update(mem_ptr_float, L7_FLOAT, load_balance_handle);
- in = 0;
- if(lower_block_size > 0) {
- for(; in < MIN(lower_block_size, (int)ncells); in++) {
- state_temp_float[in] = mem_ptr_float[ncells_old + in];
- }
- }
-
- for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
- state_temp_float[in] = mem_ptr_float[ic];
- }
-
- if(upper_block_size > 0) {
- int ic = ncells_old + lower_block_size;
- for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
- state_temp_float[in] = mem_ptr_float[ic+k];
- }
- }
- state_memory.memory_replace(mem_ptr_float, state_temp_float);
- }
- }
-
- mesh_memory.memory_realloc_all(ncells_old+indices_needed_count);
-
- MallocPlus mesh_memory_old = mesh_memory;
-
- for (memory_item = mesh_memory_old.memory_entry_by_name_begin();
- memory_item != mesh_memory_old.memory_entry_by_name_end();
- memory_item = mesh_memory_old.memory_entry_by_name_next() ) {
-
- //if (mype == 0) printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
-
- if (memory_item->mem_elsize == 8) {
- long long *mem_ptr_long = (long long *)memory_item->mem_ptr;
-
- int flags = mesh_memory.get_memory_flags(mem_ptr_long);
- long long *mesh_temp_long = (long long *)mesh_memory.memory_malloc(ncells, sizeof(long long), "mesh_temp_long", flags);
-
- //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
- L7_Update(mem_ptr_long, L7_LONG_LONG_INT, load_balance_handle);
- in = 0;
- if(lower_block_size > 0) {
- for(; in < MIN(lower_block_size, (int)ncells); in++) {
- mesh_temp_long[in] = mem_ptr_long[ncells_old + in];
- }
- }
-
- for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
- mesh_temp_long[in] = mem_ptr_long[ic];
- }
-
- if(upper_block_size > 0) {
- int ic = ncells_old + lower_block_size;
- for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
- mesh_temp_long[in] = mem_ptr_long[ic+k];
- }
- }
- mesh_memory.memory_replace(mem_ptr_long, mesh_temp_long);
-
- } else {
- int *mem_ptr_int = (int *)memory_item->mem_ptr;
-
- int flags = mesh_memory.get_memory_flags(mem_ptr_int);
- int *mesh_temp_int = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "mesh_temp_int", flags);
-
- //printf("%d: DEBUG L7_Update in do_load_balance_local mem_ptr %p\n",mype,mem_ptr);
- L7_Update(mem_ptr_int, L7_INT, load_balance_handle);
- in = 0;
- if(lower_block_size > 0) {
- for(; in < MIN(lower_block_size, (int)ncells); in++) {
- mesh_temp_int[in] = mem_ptr_int[ncells_old + in];
- }
- }
-
- for(int ic = MAX((noffset - noffset_old), 0); (ic < ncells_old) && (in < (int)ncells); ic++, in++) {
- mesh_temp_int[in] = mem_ptr_int[ic];
- }
-
- if(upper_block_size > 0) {
- int ic = ncells_old + lower_block_size;
- for(int k = max(noffset-upper_block_start,0); ((k+ic) < (ncells_old+indices_needed_count)) && (in < (int)ncells); k++, in++) {
- mesh_temp_int[in] = mem_ptr_int[ic+k];
- }
- }
- mesh_memory.memory_replace(mem_ptr_int, mesh_temp_int);
-
- }
- }
-
- L7_Free(&load_balance_handle);
- load_balance_handle = 0;
-
- memory_reset_ptrs();
-
- //mesh_memory.memory_report();
- //state_memory.memory_report();
- //printf("%d: DEBUG end load balance report\n\n",mype);
- calc_celltype(ncells);
- }
-
-
- cpu_timers[MESH_TIMER_LOAD_BALANCE] += cpu_timer_stop(tstart_cpu);
-}
-#endif
-
-#ifdef HAVE_OPENCL
-#ifdef HAVE_MPI
-int Mesh::gpu_do_load_balance_local(size_t numcells, float *weight, MallocPlus &gpu_state_memory)
-{
- int do_load_balance_global = 0;
-
- if (! gpu_do_rezone) return(do_load_balance_global);
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- // To get rid of compiler warning
- if (DEBUG && weight != NULL) printf("DEBUG weight[0] = %f\n",weight[0]);
-
- int ncells_old = numcells;
- int noffset_old = ndispl[mype];
-
-// Need to add weight array to load balance if it is not NULL
-// Need to add tolerance to when load balance is done
-
- int nsizes_old = 0;
- for (int ip=0; ip<numpe; ip++){
- nsizes_old = nsizes[ip];
- nsizes[ip] = ncells_global/numpe;
- if (ip < (int)(ncells_global%numpe)) nsizes[ip]++;
- if (nsizes_old != nsizes[ip]) do_load_balance_global = 1;
- }
-
- if(do_load_balance_global) {
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- gpu_counters[MESH_COUNTER_LOAD_BALANCE]++;
-
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- ncells = nsizes[mype];
- noffset=ndispl[mype];
-
- // Indices of blocks to be added to load balance
- int lower_block_start = noffset;
- int lower_block_end = min(noffset_old-1, (int)(noffset+ncells-1));
- int upper_block_start = max((int)(noffset_old+ncells_old), noffset);
- int upper_block_end = noffset+ncells-1;
- //printf("%d: lbs %d lbe %d ubs %d ube %d\n",mype,lower_block_start-noffset_old,lower_block_end-noffset_old,upper_block_start-noffset_old,upper_block_end-noffset_old);
-
- size_t lower_block_size = max(lower_block_end-lower_block_start+1,0);
- if(lower_block_end < 0) lower_block_size = 0; // Handles segfault at start of array
- size_t upper_block_size = max(upper_block_end-upper_block_start+1,0);
- int indices_needed_count = lower_block_size + upper_block_size;
-
- size_t middle_block_size = ncells - lower_block_size - upper_block_size;
- int middle_block_start = max(noffset - noffset_old, 0);
-
- int lower_segment_size = noffset-noffset_old;
- int do_whole_segment = 0;
- if (lower_segment_size > ncells_old) do_whole_segment = 1;
-
- int upper_segment_size = ( (noffset_old+ncells_old) - (noffset+ncells) );
- int upper_segment_start = (noffset_old+ncells_old) - upper_segment_size - noffset_old;
- if (upper_segment_size > ncells_old) do_whole_segment=1;
-
- int in = 0;
- vector<int> indices_needed(indices_needed_count);
- for (int iz = lower_block_start; iz <= lower_block_end; iz++, in++){
- indices_needed[in]=iz;
- }
- for (int iz = upper_block_start; iz <= upper_block_end; iz++, in++){
- indices_needed[in]=iz;
- }
-
- int load_balance_handle = 0;
- L7_Setup(0, noffset_old, ncells_old, &indices_needed[0], indices_needed_count, &load_balance_handle);
-
- size_t local_work_size = 128;
- size_t global_work_size = ((ncells + local_work_size - 1) / local_work_size) * local_work_size;
-
- // printf("MYPE%d: \t ncells = %d \t ncells_old = %d \t ncells_global = %d \n", mype, ncells, ncells_old, ncells_global);
-
- // Allocate lower block on GPU
- size_t low_block_size = MAX(1, lower_block_size);
- cl_mem dev_state_var_lower = ezcl_malloc(NULL, const_cast<char *>("dev_state_var_lower"), &low_block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
-
- // Allocate upper block on GPU
- size_t up_block_size = MAX(1, upper_block_size);
- cl_mem dev_state_var_upper = ezcl_malloc(NULL, const_cast<char *>("dev_state_var_upper"), &up_block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
-
- MallocPlus gpu_state_memory_old = gpu_state_memory;
- malloc_plus_memory_entry *memory_item;
-
- for (memory_item = gpu_state_memory_old.memory_entry_by_name_begin();
- memory_item != gpu_state_memory_old.memory_entry_by_name_end();
- memory_item = gpu_state_memory_old.memory_entry_by_name_next() ) {
- //printf("DEBUG -- it.mem_name %s elsize %lu\n",memory_item->mem_name,memory_item->mem_elsize);
- cl_mem dev_state_mem_ptr = (cl_mem)memory_item->mem_ptr;
-
- if (memory_item->mem_elsize == 8){
-#ifndef MINIMUM_PRECISION
- vector<double> state_var_tmp(ncells_old+indices_needed_count,0.0);
-
- // Read current state values from GPU and write to CPU arrays
- if (do_whole_segment) {
- ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, ncells_old*sizeof(cl_double), &state_var_tmp[0], NULL);
- } else {
- // Read lower block from GPU
- if (lower_segment_size > 0) {
- ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, lower_segment_size*sizeof(cl_double), &state_var_tmp[0], NULL);
- }
- // Read upper block from GPU
- if (upper_segment_size > 0) {
- ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, upper_segment_start*sizeof(cl_double), upper_segment_size*sizeof(cl_double), &state_var_tmp[upper_segment_start], NULL);
- }
- }
-
- // Update arrays with L7
- L7_Update(&state_var_tmp[0], L7_DOUBLE, load_balance_handle);
-
- // Set lower block on GPU
- if(lower_block_size > 0) {
- ezcl_enqueue_write_buffer(command_queue, dev_state_var_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_double), &state_var_tmp[ncells_old], NULL);
- }
- // Set upper block on GPU
- if(upper_block_size > 0) {
- ezcl_enqueue_write_buffer(command_queue, dev_state_var_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_double), &state_var_tmp[ncells_old+lower_block_size], NULL);
- }
-
- // Allocate space on GPU for temp arrays (used in double buffering)
- cl_mem dev_state_var_new = ezcl_malloc(NULL, gpu_state_memory.get_memory_name(dev_state_mem_ptr), &ncells, sizeof(cl_double), CL_MEM_READ_WRITE, 0);
- gpu_state_memory.memory_add(dev_state_var_new, ncells, sizeof(cl_double), "dev_state_var_new", DEVICE_REGULAR_MEMORY);
-
- //printf("DEBUG memory for proc %d is %p dev_state_new is %p\n",mype,dev_state_mem_ptr,dev_state_var_new);
-
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 1, sizeof(cl_int), &lower_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 2, sizeof(cl_int), &middle_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 3, sizeof(cl_int), &middle_block_start);
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 4, sizeof(cl_mem), &dev_state_mem_ptr);
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 5, sizeof(cl_mem), &dev_state_var_lower);
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 6, sizeof(cl_mem), &dev_state_var_upper);
- ezcl_set_kernel_arg(kernel_do_load_balance_double, 7, sizeof(cl_mem), &dev_state_var_new);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_double, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
-#else
- printf("ERROR -- can't have double type for state variable\n");
- exit(1);
-#endif
- } else if (memory_item->mem_elsize == 4){
- vector<float> state_var_tmp(ncells_old+indices_needed_count,0.0);
-
- // Read current state values from GPU and write to CPU arrays
- if (do_whole_segment) {
- ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, ncells_old*sizeof(cl_float), &state_var_tmp[0], NULL);
- } else {
- // Read lower block from GPU
- if (lower_segment_size > 0) {
- ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, 0, lower_segment_size*sizeof(cl_float), &state_var_tmp[0], NULL);
- }
- // Read upper block from GPU
- if (upper_segment_size > 0) {
- ezcl_enqueue_read_buffer(command_queue, dev_state_mem_ptr, CL_TRUE, upper_segment_start*sizeof(cl_float), upper_segment_size*sizeof(cl_float), &state_var_tmp[upper_segment_start], NULL);
- }
- }
-
- // Update arrays with L7
- L7_Update(&state_var_tmp[0], L7_FLOAT, load_balance_handle);
-
- // Set lower block on GPU
- if(lower_block_size > 0) {
- ezcl_enqueue_write_buffer(command_queue, dev_state_var_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_float), &state_var_tmp[ncells_old], NULL);
- }
- // Set upper block on GPU
- if(upper_block_size > 0) {
- ezcl_enqueue_write_buffer(command_queue, dev_state_var_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_float), &state_var_tmp[ncells_old+lower_block_size], NULL);
- }
-
- // Allocate space on GPU for temp arrays (used in double buffering)
- cl_mem dev_state_var_new = ezcl_malloc(NULL, gpu_state_memory.get_memory_name(dev_state_mem_ptr), &ncells, sizeof(cl_float), CL_MEM_READ_WRITE, 0);
- gpu_state_memory.memory_add(dev_state_var_new, ncells, sizeof(cl_float), "dev_state_var_new", DEVICE_REGULAR_MEMORY);
-
- //printf("DEBUG memory for proc %d is %p dev_state_new is %p\n",mype,dev_state_mem_ptr,dev_state_var_new);
-
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 1, sizeof(cl_int), &lower_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 2, sizeof(cl_int), &middle_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 3, sizeof(cl_int), &middle_block_start);
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 4, sizeof(cl_mem), &dev_state_mem_ptr);
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 5, sizeof(cl_mem), &dev_state_var_lower);
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 6, sizeof(cl_mem), &dev_state_var_upper);
- ezcl_set_kernel_arg(kernel_do_load_balance_float, 7, sizeof(cl_mem), &dev_state_var_new);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_float, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- gpu_state_memory.memory_replace(dev_state_mem_ptr, dev_state_var_new);
- }
- }
-
- ezcl_device_memory_delete(dev_state_var_lower);
- ezcl_device_memory_delete(dev_state_var_upper);
-
- vector<int> i_tmp(ncells_old+indices_needed_count,0);
- vector<int> j_tmp(ncells_old+indices_needed_count,0);
- vector<int> level_tmp(ncells_old+indices_needed_count,0);
- vector<int> celltype_tmp(ncells_old+indices_needed_count,0);
-
- if (do_whole_segment) {
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, ncells_old*sizeof(cl_int), &i_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, ncells_old*sizeof(cl_int), &j_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, ncells_old*sizeof(cl_int), &level_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, 0, ncells_old*sizeof(cl_int), &celltype_tmp[0], NULL);
- } else {
- if (lower_segment_size > 0) {
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &i_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &j_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, 0, lower_segment_size*sizeof(cl_int), &level_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, 0, lower_segment_size*sizeof(cl_int), &celltype_tmp[0], NULL);
- }
- if (upper_segment_size > 0) {
- ezcl_enqueue_read_buffer(command_queue, dev_i, CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &i_tmp[upper_segment_start], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_j, CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &j_tmp[upper_segment_start], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_FALSE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &level_tmp[upper_segment_start], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_celltype, CL_TRUE, upper_segment_start*sizeof(cl_int), upper_segment_size*sizeof(cl_int), &celltype_tmp[upper_segment_start], NULL);
- }
- }
-
- L7_Update(&i_tmp[0], L7_INT, load_balance_handle);
- L7_Update(&j_tmp[0], L7_INT, load_balance_handle);
- L7_Update(&level_tmp[0], L7_INT, load_balance_handle);
- L7_Update(&celltype_tmp[0], L7_INT, load_balance_handle);
-
- L7_Free(&load_balance_handle);
- load_balance_handle = 0;
-
- // Allocate and set lower block on GPU
- cl_mem dev_i_lower, dev_j_lower, dev_level_lower, dev_celltype_lower;
-
- if(lower_block_size > 0) {
- dev_i_lower = ezcl_malloc(NULL, const_cast<char *>("dev_i_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_j_lower = ezcl_malloc(NULL, const_cast<char *>("dev_j_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_level_lower = ezcl_malloc(NULL, const_cast<char *>("dev_level_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_celltype_lower = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_lower"), &lower_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_enqueue_write_buffer(command_queue, dev_i_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_int), &i_tmp[ncells_old], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_j_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_int), &j_tmp[ncells_old], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_level_lower, CL_FALSE, 0, lower_block_size*sizeof(cl_int), &level_tmp[ncells_old], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_celltype_lower, CL_TRUE, 0, lower_block_size*sizeof(cl_int), &celltype_tmp[ncells_old], NULL);
- }
-
- // Allocate and set upper block on GPU
- cl_mem dev_i_upper, dev_j_upper, dev_level_upper, dev_celltype_upper;
- if(upper_block_size > 0) {
- dev_i_upper = ezcl_malloc(NULL, const_cast<char *>("dev_i_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_j_upper = ezcl_malloc(NULL, const_cast<char *>("dev_j_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_level_upper = ezcl_malloc(NULL, const_cast<char *>("dev_level_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- dev_celltype_upper = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_upper"), &upper_block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- ezcl_enqueue_write_buffer(command_queue, dev_i_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_int), &i_tmp[ncells_old+lower_block_size], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_j_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_int), &j_tmp[ncells_old+lower_block_size], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_level_upper, CL_FALSE, 0, upper_block_size*sizeof(cl_int), &level_tmp[ncells_old+lower_block_size], NULL);
- ezcl_enqueue_write_buffer(command_queue, dev_celltype_upper, CL_TRUE, 0, upper_block_size*sizeof(cl_int), &celltype_tmp[ncells_old+lower_block_size], NULL);
- }
-
- local_work_size = 128;
-
- // printf("MYPE%d: \t ncells = %d \t ncells_old = %d \t ncells_global = %d \n", mype, ncells, ncells_old, ncells_global);
- // Allocate space on GPU for temp arrays (used in double buffering)
-
- size_t mem_request = (int)((float)ncells*mem_factor);
- cl_mem dev_i_new = ezcl_malloc(NULL, const_cast<char *>("dev_i_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_j_new = ezcl_malloc(NULL, const_cast<char *>("dev_j_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_level_new = ezcl_malloc(NULL, const_cast<char *>("dev_level_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
- cl_mem dev_celltype_new = ezcl_malloc(NULL, const_cast<char *>("dev_celltype_new"), &mem_request, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- // Set kernel arguments and call lower block kernel
- if(lower_block_size > 0) {
-
- size_t global_work_size = ((lower_block_size + local_work_size - 1) / local_work_size) * local_work_size;
-
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 0, sizeof(cl_mem), &dev_i_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 1, sizeof(cl_mem), &dev_j_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 2, sizeof(cl_mem), &dev_level_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 3, sizeof(cl_mem), &dev_celltype_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 4, sizeof(cl_mem), &dev_i_lower);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 5, sizeof(cl_mem), &dev_j_lower);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 6, sizeof(cl_mem), &dev_level_lower);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 7, sizeof(cl_mem), &dev_celltype_lower);
- ezcl_set_kernel_arg(kernel_do_load_balance_lower, 8, sizeof(cl_int), &lower_block_size);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_lower, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- ezcl_device_memory_delete(dev_i_lower);
- ezcl_device_memory_delete(dev_j_lower);
- ezcl_device_memory_delete(dev_level_lower);
- ezcl_device_memory_delete(dev_celltype_lower);
- }
-
- // Set kernel arguments and call middle block kernel
- if(middle_block_size > 0) {
-
- size_t global_work_size = ((middle_block_size + local_work_size - 1) / local_work_size) * local_work_size;
-
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 0, sizeof(cl_mem), &dev_i_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 1, sizeof(cl_mem), &dev_j_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 2, sizeof(cl_mem), &dev_level_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 3, sizeof(cl_mem), &dev_celltype_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 4, sizeof(cl_mem), &dev_i);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 5, sizeof(cl_mem), &dev_j);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 6, sizeof(cl_mem), &dev_level);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 7, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 8, sizeof(cl_int), &lower_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 9, sizeof(cl_int), &middle_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_middle, 10, sizeof(cl_int), &middle_block_start);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_middle, 1, NULL, &global_work_size, &local_work_size, NULL);
- }
-
- // Set kernel arguments and call upper block kernel
- if(upper_block_size > 0) {
-
- size_t global_work_size = ((upper_block_size + local_work_size - 1) / local_work_size) * local_work_size;
-
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 0, sizeof(cl_mem), &dev_i_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 1, sizeof(cl_mem), &dev_j_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 2, sizeof(cl_mem), &dev_level_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 3, sizeof(cl_mem), &dev_celltype_new);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 4, sizeof(cl_mem), &dev_i_upper);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 5, sizeof(cl_mem), &dev_j_upper);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 6, sizeof(cl_mem), &dev_level_upper);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 7, sizeof(cl_mem), &dev_celltype_upper);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 8, sizeof(cl_int), &lower_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 9, sizeof(cl_int), &middle_block_size);
- ezcl_set_kernel_arg(kernel_do_load_balance_upper, 10, sizeof(cl_int), &upper_block_size);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_do_load_balance_upper, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- ezcl_device_memory_delete(dev_i_upper);
- ezcl_device_memory_delete(dev_j_upper);
- ezcl_device_memory_delete(dev_level_upper);
- ezcl_device_memory_delete(dev_celltype_upper);
- }
-
- ezcl_device_memory_swap(&dev_i_new, &dev_i);
- ezcl_device_memory_swap(&dev_j_new, &dev_j);
- ezcl_device_memory_swap(&dev_level_new, &dev_level);
- ezcl_device_memory_swap(&dev_celltype_new, &dev_celltype);
-
- ezcl_device_memory_delete(dev_i_new);
- ezcl_device_memory_delete(dev_j_new);
- ezcl_device_memory_delete(dev_level_new);
- ezcl_device_memory_delete(dev_celltype_new);
-
- gpu_timers[MESH_TIMER_LOAD_BALANCE] += (long int)(cpu_timer_stop(tstart_cpu)*1.0e9);
- }
-
- return(do_load_balance_global);
-}
-#endif
-#endif
-
-#ifdef HAVE_OPENCL
-int Mesh::gpu_count_BCs(void)
-{
- cl_event count_BCs_stage1_event, count_BCs_stage2_event;
-
- size_t local_work_size = MIN(ncells, TILE_SIZE);
- size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
-
- //size_t block_size = (ncells + TILE_SIZE - 1) / TILE_SIZE; // For on-device global reduction kernel.
- size_t block_size = global_work_size/local_work_size;
-
- int bcount = 0;
-
- if (! have_boundary) {
- cl_command_queue command_queue = ezcl_get_command_queue();
- cl_mem dev_ioffset = ezcl_malloc(NULL, const_cast<char *>("dev_ioffset"), &block_size, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- /*
- __kernel void count_BCs(
- const int isize, // 0
- __global const int *i, // 1
- __global const int *j, // 2
- __global const int *level, // 3
- __global const int *lev_ibeg, // 4
- __global const int *lev_iend, // 5
- __global const int *lev_jbeg, // 6
- __global const int *lev_jend, // 7
- __global int *scratch, // 8
- __local int *tile) // 9
- */
- size_t shared_spd_sum_int = local_work_size * sizeof(cl_int);
- ezcl_set_kernel_arg(kernel_count_BCs, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_count_BCs, 1, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_count_BCs, 2, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_count_BCs, 3, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_count_BCs, 4, sizeof(cl_mem), (void *)&dev_levibeg);
- ezcl_set_kernel_arg(kernel_count_BCs, 5, sizeof(cl_mem), (void *)&dev_leviend);
- ezcl_set_kernel_arg(kernel_count_BCs, 6, sizeof(cl_mem), (void *)&dev_levjbeg);
- ezcl_set_kernel_arg(kernel_count_BCs, 7, sizeof(cl_mem), (void *)&dev_levjend);
- ezcl_set_kernel_arg(kernel_count_BCs, 8, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_count_BCs, 9, shared_spd_sum_int, 0);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_count_BCs, 1, NULL, &global_work_size, &local_work_size, &count_BCs_stage1_event);
-
- if (block_size > 1) {
- ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 1, sizeof(cl_mem), (void *)&dev_ioffset);
- ezcl_set_kernel_arg(kernel_reduce_sum_int_stage2of2, 2, shared_spd_sum_int, 0);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_int_stage2of2, 1, NULL, &local_work_size, &local_work_size, &count_BCs_stage2_event);
- }
-
- ezcl_enqueue_read_buffer(command_queue, dev_ioffset, CL_TRUE, 0, 1*sizeof(cl_int), &bcount, NULL);
-
- //printf("DEBUG -- bcount is %d\n",bcount);
- //state->gpu_time_read += ezcl_timer_calc(&start_read_event, &start_read_event);
-
- ezcl_device_memory_delete(dev_ioffset);
-
- gpu_timers[MESH_TIMER_COUNT_BCS] += ezcl_timer_calc(&count_BCs_stage1_event, &count_BCs_stage1_event);
- if (block_size > 1) {
- gpu_timers[MESH_TIMER_COUNT_BCS] += ezcl_timer_calc(&count_BCs_stage2_event, &count_BCs_stage2_event);
- }
-
- }
-
- return(bcount);
-}
-#endif
-
-void Mesh::allocate(size_t ncells)
-{
- int flags = 0;
- flags = RESTART_DATA;
-#ifdef HAVE_J7
- if (parallel) flags = LOAD_BALANCE_MEMORY;
-#endif
-
- i = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "i", flags);
- j = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "j", flags);
- level = (int *)mesh_memory.memory_malloc(ncells, sizeof(int), "level", flags);
-}
-
-
-void Mesh::resize(size_t new_ncells)
-{
- size_t current_size = mesh_memory.get_memory_size(i);
- if (new_ncells > current_size) mesh_memory.memory_realloc_all(new_ncells);
-}
-
-void Mesh::memory_reset_ptrs(void){
- i = (int *)mesh_memory.get_memory_ptr("i");
- j = (int *)mesh_memory.get_memory_ptr("j");
- level = (int *)mesh_memory.get_memory_ptr("level");
- celltype = (int *)mesh_memory.get_memory_ptr("celltype");
- nlft = (int *)mesh_memory.get_memory_ptr("nlft");
- nrht = (int *)mesh_memory.get_memory_ptr("nrht");
- nbot = (int *)mesh_memory.get_memory_ptr("nbot");
- ntop = (int *)mesh_memory.get_memory_ptr("ntop");
-}
-
-void Mesh::resize_old_device_memory(size_t ncells)
-{
-#ifdef HAVE_OPENCL
- ezcl_device_memory_delete(dev_level);
- ezcl_device_memory_delete(dev_i);
- ezcl_device_memory_delete(dev_j);
- ezcl_device_memory_delete(dev_celltype);
- size_t mem_request = (int)((float)ncells*mem_factor);
- dev_level = ezcl_malloc(NULL, const_cast<char *>("dev_level"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0);
- dev_i = ezcl_malloc(NULL, const_cast<char *>("dev_i"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0);
- dev_j = ezcl_malloc(NULL, const_cast<char *>("dev_j"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0);
- dev_celltype = ezcl_malloc(NULL, const_cast<char *>("dev_celltype"), &mem_request, sizeof(cl_int), CL_MEM_READ_ONLY, 0);
-#else
- // To get rid of compiler warning
- if (1 == 2) printf("DEBUG -- ncells is %lu\n",ncells);
-#endif
-}
-void Mesh::print_object_info(void)
-{
- printf(" ---- Mesh object info -----\n");
- printf("Dimensionality : %d\n",ndim);
- printf("Parallel info : mype %d numpe %d noffset %d parallel %d\n",mype,numpe,noffset,parallel);
- printf("Sizes : ncells %ld ncells_ghost %ld\n\n",ncells,ncells_ghost);
-#ifdef HAVE_OPENCL
- int num_elements, elsize;
-
- num_elements = ezcl_get_device_mem_nelements(dev_celltype);
- elsize = ezcl_get_device_mem_elsize(dev_celltype);
- printf("dev_celltype ptr : %p nelements %d elsize %d\n",dev_celltype,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_level);
- elsize = ezcl_get_device_mem_elsize(dev_level);
- printf("dev_level ptr : %p nelements %d elsize %d\n",dev_level,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_i);
- elsize = ezcl_get_device_mem_elsize(dev_i);
- printf("dev_i ptr : %p nelements %d elsize %d\n",dev_i,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_j);
- elsize = ezcl_get_device_mem_elsize(dev_j);
- printf("dev_j ptr : %p nelements %d elsize %d\n",dev_j,num_elements,elsize);
-
- num_elements = ezcl_get_device_mem_nelements(dev_nlft);
- elsize = ezcl_get_device_mem_elsize(dev_nlft);
- printf("dev_nlft ptr : %p nelements %d elsize %d\n",dev_nlft,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_nrht);
- elsize = ezcl_get_device_mem_elsize(dev_nrht);
- printf("dev_nrht ptr : %p nelements %d elsize %d\n",dev_nrht,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_nbot);
- elsize = ezcl_get_device_mem_elsize(dev_nbot);
- printf("dev_nbot ptr : %p nelements %d elsize %d\n",dev_nbot,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_ntop);
- elsize = ezcl_get_device_mem_elsize(dev_ntop);
- printf("dev_ntop ptr : %p nelements %d elsize %d\n",dev_ntop,num_elements,elsize);
-#endif
- printf("vector celltype ptr : %p nelements %ld elsize %ld\n",&celltype[0],mesh_memory.get_memory_size(celltype),sizeof(celltype[0]));
- printf("vector level ptr : %p nelements %ld elsize %ld\n",&level[0], mesh_memory.get_memory_size(level), sizeof(level[0]));
- printf("vector i ptr : %p nelements %ld elsize %ld\n",&i[0], mesh_memory.get_memory_size(i), sizeof(i[0]));
- printf("vector j ptr : %p nelements %ld elsize %ld\n",&j[0], mesh_memory.get_memory_size(j), sizeof(j[0]));
-
- printf("vector nlft ptr : %p nelements %ld elsize %ld\n",&nlft[0], mesh_memory.get_memory_size(nlft), sizeof(nlft[0]));
- printf("vector nrht ptr : %p nelements %ld elsize %ld\n",&nrht[0], mesh_memory.get_memory_size(nrht), sizeof(nrht[0]));
- printf("vector nbot ptr : %p nelements %ld elsize %ld\n",&nbot[0], mesh_memory.get_memory_size(nbot), sizeof(nbot[0]));
- printf("vector ntop ptr : %p nelements %ld elsize %ld\n",&ntop[0], mesh_memory.get_memory_size(ntop), sizeof(ntop[0]));
-}
-
-
-void Mesh::set_refinement_order(int order[4], int ic, int ifirst, int ilast, int jfirst, int jlast,
- int level_first, int level_last, int *i_old, int *j_old, int *level_old)
-{
- if (localStencil) {
- // Store the coordinates of the cells before and after this one on
- // the space-filling curve index.
-
-#ifdef __OLD_STENCIL__
- spatial_t nx[3], // x-coordinates of cells.
- ny[3]; // y-coordinates of cells.
- if (ic != 0) {
- nx[0] = lev_deltax[level_old[ic-1]] * (spatial_t)i[ic-1];
- ny[0] = lev_deltay[level_old[ic-1]] * (spatial_t)j[ic-1];
- } else {
- nx[0] = lev_deltax[level_first] * (spatial_t)ifirst;
- ny[0] = lev_deltay[level_first] * (spatial_t)jfirst;
- }
- nx[1] = lev_deltax[level_old[ic ]] * (spatial_t)i[ic ];
- ny[1] = lev_deltay[level_old[ic ]] * (spatial_t)j[ic ];
- if (ic != ncells-1) {
- nx[2] = lev_deltax[level_old[ic+1]] * (spatial_t)i[ic+1];
- ny[2] = lev_deltay[level_old[ic+1]] * (spatial_t)j[ic+1];
- } else {
- nx[2] = lev_deltax[level_last] * (spatial_t)ilast;
- ny[2] = lev_deltay[level_last] * (spatial_t)jlast;
- }
-
- // Figure out relative orientation of the neighboring cells. We are
- // are aided in this because the Hilbert curve only has six possible
- // ways across the cell: four Ls and two straight lines. Then
- // refine the cell according to the relative orientation and order
- // according to the four-point Hilbert stencil.
- if (nx[0] < nx[1] and ny[2] < ny[1]) // southwest L, forward order
- { order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; }
- else if (nx[2] < nx[1] and ny[0] < ny[1]) // southwest L, reverse order
- { order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; }
- else if (nx[0] > nx[1] and ny[2] < ny[1]) // southeast L, forward order
- { order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW; }
- else if (nx[2] > nx[1] and ny[0] < ny[1]) // southeast L, reverse order
- { order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; }
- else if (nx[0] > nx[1] and ny[2] > ny[1]) // northeast L, forward order
- { order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE; }
- else if (nx[2] > nx[1] and ny[0] > ny[1]) // northeast L, reverse order
- { order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; }
- else if (nx[0] < nx[1] and ny[2] > ny[1]) // northwest L, forward order
- { order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; }
- else if (nx[2] < nx[1] and ny[0] > ny[1]) // northwest L, reverse order
- { order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW; }
- else if (nx[0] > nx[1] and nx[1] > nx[2]) // straight horizontal, forward order
- { order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW; }
- else if (nx[0] < nx[1] and nx[1] < nx[2]) // straight horizontal, reverse order
- { order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE; }
- else if (ny[0] > ny[1] and ny[1] > ny[2]) // straight vertical, forward order
- { order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE; }
- else if (ny[0] < ny[1] and ny[1] < ny[2]) // straight vertical, reverse order
- { order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW; }
- else // other, default to z-order
- { order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; }
-#endif
-
-#ifdef __NEW_STENCIL__
- int ir[3], // First i index at finest level of the mesh
- jr[3]; // First j index at finest level of the mesh
- // Cell's Radius at the Finest level of the mesh
-
- int crf = IPOW2(levmx-level_old[ic]);
-
- if (ic != 0) {
- ir[0] = i_old[ic - 1] * IPOW2(levmx-level_old[ic - 1]);
- jr[0] = j_old[ic - 1] * IPOW2(levmx-level_old[ic - 1]);
- } else {
- //printf("%d cell %d is a first\n",mype,ic);
- ir[0] = ifirst * IPOW2(levmx-level_first);
- jr[0] = jfirst * IPOW2(levmx-level_first);
- }
- ir[1] = i_old[ic ] * IPOW2(levmx-level_old[ic ]);
- jr[1] = j_old[ic ] * IPOW2(levmx-level_old[ic ]);
- if (ic != (int)ncells-1) {
- ir[2] = i_old[ic + 1] * IPOW2(levmx-level_old[ic + 1]);
- jr[2] = j_old[ic + 1] * IPOW2(levmx-level_old[ic + 1]);
- } else {
- //printf("%d cell %d is a last\n",mype,ic);
- ir[2] = ilast * IPOW2(levmx-level_last);
- jr[2] = jlast * IPOW2(levmx-level_last);
- }
- //if (parallel) fprintf(fp,"%d: DEBUG rezone top boundary -- ic %d global %d noffset %d nc %d i %d j %d level %d\n",mype,ic,ic+noffset,noffset,nc,i[nc],j[nc],level[nc]);
-
- int dir_in = ir[1] - ir[0];
- int dir_out = ir[1] - ir[2];
- int djr_in = jr[1] - jr[0];
- int djr_out = jr[1] - jr[2];
-
- char in_direction = 'X';
- char out_direction = 'X';
-
- // Left In
- if( (djr_in == 0 && (dir_in == crf*HALF || dir_in == crf || dir_in == crf*TWO)) || (djr_in == -crf*HALF && dir_in == crf*HALF) || (djr_in == crf && dir_in == crf*TWO) ) {
- in_direction = 'L';
- }
- // Bottom In
- else if( (dir_in == 0 && (djr_in == crf*HALF || djr_in == crf || djr_in == crf*TWO)) || (dir_in == -crf*HALF && djr_in == crf*HALF) || (dir_in == crf && djr_in == crf*TWO) ) {
- in_direction = 'B';
- }
- // Right In
- else if( (dir_in == -crf && (djr_in == -crf*HALF || djr_in == 0 || (djr_in == crf && level_old[ic-1] < level_old[ic]))) ) {
- in_direction = 'R';
- }
- // Top In
- else if( (djr_in == -crf && (dir_in == -crf*HALF || dir_in == 0 || (dir_in == crf && level_old[ic-1] < level_old[ic]))) ) {
- in_direction = 'T';
- }
- // Further from the left
- else if( dir_in > 0 && djr_in == 0 ) {
- in_direction = 'L';
- }
- // Further from the right
- else if( dir_in < 0 && djr_in == 0 ) {
- in_direction = 'R';
- }
- // Further from the bottom
- else if( djr_in > 0 && dir_in == 0 ) {
- in_direction = 'B';
- }
- // Further from the top
- else if( djr_in < 0 && dir_in == 0 ) {
- in_direction = 'T';
- }
- // SW in; 'M'
- else if( dir_in > 0 && djr_in > 0) {
- in_direction = 'M';
- }
- // NW in; 'W'
- else if( dir_in > 0 && djr_in < 0) {
- in_direction = 'W';
- }
- // SE in; 'F'
- else if( dir_in < 0 && djr_in > 0) {
- in_direction = 'F';
- }
- // NE in; 'E'
- else if( dir_in < 0 && djr_in < 0) {
- in_direction = 'E';
- }
-
-
- // Left Out
- if( (djr_out == 0 && (dir_out == crf*HALF || dir_out == crf || dir_out == crf*TWO)) || (djr_out == -crf*HALF && dir_out == crf*HALF) || (djr_out == crf && dir_out == crf*TWO) ) {
- out_direction = 'L';
- }
- // Bottom Out
- else if( (dir_out == 0 && (djr_out == crf*HALF || djr_out == crf || djr_out == crf*TWO)) || (dir_out == -crf*HALF && djr_out == crf*HALF) || (dir_out == crf && djr_out == crf*TWO) ) {
- out_direction = 'B';
- }
- // Right Out
- else if( (dir_out == -crf && (djr_out == -crf*HALF || djr_out == 0 || (djr_out == crf && level_old[ic+1] < level_old[ic]))) ) {
- out_direction = 'R';
- }
- // Top Out
- else if( (djr_out == -crf && (dir_out == -crf*HALF || dir_out == 0 || (dir_out == crf && level_old[ic+1] < level_old[ic]))) ) {
- out_direction = 'T';
- }
- // Further from the left
- else if( dir_out > 0 && djr_out == 0 ) {
- out_direction = 'L';
- }
- // Further from the right
- else if( dir_out < 0 && djr_out == 0 ) {
- out_direction = 'R';
- }
- // Further from the bottom
- else if( djr_out > 0 && dir_out == 0 ) {
- out_direction = 'B';
- }
- // Further from the top
- else if( djr_out < 0 && dir_out == 0 ) {
- out_direction = 'T';
- }
- // SW out; 'M'
- else if( dir_out > 0 && djr_out > 0) {
- out_direction = 'M';
- }
- // NW out; 'W'
- else if( dir_out > 0 && djr_out < 0) {
- out_direction = 'W';
- }
- // SE out; 'F'
- else if( dir_out < 0 && djr_out > 0) {
- out_direction = 'F';
- }
- // NE out; 'E'
- else if( dir_out < 0 && djr_out < 0) {
- out_direction = 'E';
- }
-
- // Set the Stencil
- if(in_direction == 'L' && (out_direction == 'B' || out_direction == 'R' || out_direction == 'F')) {
- order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE;
- }
- else if(in_direction == 'L' && (out_direction == 'T' || out_direction == 'W' )) {
- order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW;
- }
- else if(in_direction == 'L' && out_direction == 'M') {
- order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW;
- }
- else if(in_direction == 'L' && out_direction == 'E') {
- order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE;
- }
-
- else if(in_direction == 'B' && (out_direction == 'R' || out_direction == 'F' )) {
- order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE;
- }
- else if(in_direction == 'B' && (out_direction == 'L' || out_direction == 'T' || out_direction == 'W' )) {
- order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW;
- }
- else if(in_direction == 'B' && out_direction == 'M') {
- order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW;
- }
- else if(in_direction == 'B' && out_direction == 'E') {
- order[0] = SW; order[1] = NW; order[2] = SE; order[3] = NE;
- }
-
- else if(in_direction == 'R' && (out_direction == 'T' || out_direction == 'L' || out_direction == 'W' )) {
- order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW;
- }
- else if(in_direction == 'R' && (out_direction == 'B' || out_direction == 'F' )) {
- order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE;
- }
- else if(in_direction == 'R' && out_direction == 'M') {
- order[0] = NE; order[1] = NW; order[2] = SE; order[3] = SW;
- }
- else if(in_direction == 'R' && out_direction == 'E') {
- order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE;
- }
-
- else if(in_direction == 'T' && (out_direction == 'L' || out_direction == 'W' )) {
- order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW;
- }
- else if(in_direction == 'T' && (out_direction == 'R' || out_direction == 'B' || out_direction == 'F' )) {
- order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE;
- }
- else if(in_direction == 'T' && out_direction == 'M') {
- order[0] = NE; order[1] = SE; order[2] = NW; order[3] = SW;
- }
- else if(in_direction == 'T' && out_direction == 'E') {
- order[0] = NW; order[1] = SW; order[2] = SE; order[3] = NE;
- }
-
- else if(in_direction == 'M' && (out_direction == 'L' || out_direction == 'W' || out_direction == 'T') ) {
- order[0] = SW; order[1] = SE; order[2] = NE; order[3] = NW;
- }
- else if(in_direction == 'M' && (out_direction == 'R' || out_direction == 'F' || out_direction == 'B') ) {
- order[0] = SW; order[1] = NW; order[2] = NE; order[3] = SE;
- }
- else if(in_direction == 'M' && out_direction == 'E') {
- order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE;
- }
-
- else if(in_direction == 'W' && (out_direction == 'L' || out_direction == 'M' || out_direction == 'B') ) {
- order[0] = NW; order[1] = NE; order[2] = SE; order[3] = SW;
- }
- else if(in_direction == 'W' && (out_direction == 'R' || out_direction == 'E' || out_direction == 'T') ) {
- order[0] = NW; order[1] = SW; order[2] = SE; order[3] = NE;
- }
- else if(in_direction == 'W' && out_direction == 'F') {
- order[0] = NW; order[1] = NE; order[2] = SW; order[3] = SE;
- }
-
- else if(in_direction == 'F' && (out_direction == 'L' || out_direction == 'M' || out_direction == 'B') ) {
- order[0] = SE; order[1] = NE; order[2] = NW; order[3] = SW;
- }
- else if(in_direction == 'F' && (out_direction == 'R' || out_direction == 'E' || out_direction == 'T') ) {
- order[0] = SE; order[1] = SW; order[2] = NW; order[3] = NE;
- }
- else if(in_direction == 'F' && out_direction == 'W') {
- order[0] = SE; order[1] = NE; order[2] = SW; order[3] = NW;
- }
-
- else if(in_direction == 'E' && (out_direction == 'L' || out_direction == 'W' || out_direction == 'T') ) {
- order[0] = NE; order[1] = SE; order[2] = SW; order[3] = NW;
- }
- else if(in_direction == 'E' && (out_direction == 'R' || out_direction == 'F' || out_direction == 'B') ) {
- order[0] = NE; order[1] = NW; order[2] = SW; order[3] = SE;
- }
- else if(in_direction == 'E' && out_direction == 'M') {
- order[0] = NE; order[1] = SE; order[2] = NW; order[3] = SW;
- }
-
- else { // Default to a knot
- order[0] = NW; order[1] = SE; order[2] = SW; order[3] = NE;
- if (do_stencil_warning) {
- printf("Nonlocal case for the stencil.\n");
- }
- }
- // Determine the relative orientation of the neighboring cells.
- // There are 12 possible ways across the cell: 4 Ls and 2 straight
- // lines, each with 2 directions of traversal.
- // Then the cell is refined and ordered according to the relative
- // orientation and four-point Hilbert stencil.
-
- // XXX NOTE that the four-point stencil varies depending upon
- // the starting and ending point of the global Hilbert curve.
- // The stencil applied here assumes the start at (0,0) and the end
- // at (0,y_max). XXX WRONG
-#endif
-
- } // End local stencil version
- else // Use Z-ordering for the curve.
- { order[0] = SW; order[1] = SE; order[2] = NW; order[3] = NE; }
-
-}
-
-void Mesh::calc_face_list(void)
-{
- xface_i.clear();
- xface_j.clear();
- xface_level.clear();
-
- ixmin_level.clear();
- ixmax_level.clear();
- jxmin_level.clear();
- jxmax_level.clear();
- ixmin_level.resize(levmx+1, 9999999);
- ixmax_level.resize(levmx+1, -9999999);
- jxmin_level.resize(levmx+1, 9999999);
- jxmax_level.resize(levmx+1, -9999999);
-
- ixadjust.clear();
- ixadjust.resize(levmx+1);
- jxadjust.clear();
- jxadjust.resize(levmx+1);
-
- int iface=0;
- for (int nz=0; nz<(int)ncells; nz++){
- int nr = nrht[nz];
- if (nr == nz) continue;
-
- int ifactor = 1;
- if (level[nr] < level[nz]) ifactor = 2;
-
- // Have right face
- //printf("DEBUG xface -- iface %d lower nz %d upper nr %d\n",iface,nz,nr);
- xface_level.push_back(MAX(level[nz],level[nr]));
- xface_i.push_back(i[nr]*ifactor);
- if (level[nr] < level[nz] && is_upper(j[nz]) ) {
- xface_j.push_back(j[nr]*ifactor+1);
- } else {
- xface_j.push_back(j[nr]*ifactor);
- }
-
- iface++;
-
- if (level[nr] > level[nz] && is_lower(j[nr]) ){
- int ntr = ntop[nr];
- if (ntr != nr) {
- //printf("DEBUG xface -- iface %d lower nz %d upper ntr %d\n",iface,nz,ntr);
- xface_level.push_back(MAX(level[nz],level[ntr]));
- xface_i.push_back(i[ntr]*ifactor);
- xface_j.push_back(j[ntr]*ifactor);
-
- iface++;
- }
- }
- }
- nxface=iface;
-
- yface_i.clear();
- yface_j.clear();
- yface_level.clear();
-
- iymin_level.clear();
- iymax_level.clear();
- jymin_level.clear();
- jymax_level.clear();
- iymin_level.resize(levmx+1, 9999999);
- iymax_level.resize(levmx+1, -9999999);
- jymin_level.resize(levmx+1, 9999999);
- jymax_level.resize(levmx+1, -9999999);
-
- iyadjust.clear();
- iyadjust.resize(levmx+1);
- jyadjust.clear();
- jyadjust.resize(levmx+1);
-
- iface=0;
- for (int nz=0; nz<(int)ncells; nz++){
- int nt = ntop[nz];
- if (nt == nz) continue;
-
- int ifactor = 1;
- if (level[nt] < level[nz]) ifactor = 2;
-
- // Have top face
- //printf("DEBUG yface -- iface %d lower nz %d upper nt %d\n",iface,nz,nt);
- yface_level.push_back(MAX(level[nz],level[nt]));
- yface_j.push_back(j[nt]*ifactor);
- if (level[nt] < level[nz] && is_upper(i[nz]) ) {
- yface_i.push_back(i[nt]*ifactor+1);
- } else{
- yface_i.push_back(i[nt]*ifactor);
- }
-
- iface++;
- if (level[nt] > level[nz] && is_lower(i[nt]) ){
- int nrt = nrht[nt];
- if (nrt != nt) {
- //printf("DEBUG yface -- iface %d lower nz %d upper nrt %d\n",iface,nz,nrt);
- yface_level.push_back(MAX(level[nz],level[nrt]));
- yface_j.push_back(j[nrt]*ifactor);
- yface_i.push_back(i[nrt]*ifactor);
-
- iface++;
- }
-
- }
- }
- nyface=iface;
-
- for (int iface=0; iface < nxface; iface++){
- int fl = xface_level[iface];
-
- int fi = xface_i[iface];
- if (fi < ixmin_level[fl]) ixmin_level[fl] = fi;
- if (fi > ixmax_level[fl]) ixmax_level[fl] = fi;
-
- int fj = xface_j[iface];
- if (fj < jxmin_level[fl]) jxmin_level[fl] = fj;
- if (fj > jxmax_level[fl]) jxmax_level[fl] = fj;
- }
-
- for (int iface=0; iface < nxface; iface++){
- int fl = xface_level[iface];
- if (ixmax_level[fl] < ixmin_level[fl]) continue;
-
- xface_i[iface] -= ixmin_level[fl];
- xface_j[iface] -= jxmin_level[fl];
- }
-
- for (int fl = 0; fl <= levmx; fl++){
- ixadjust[fl] = ixmin_level[fl];
- jxadjust[fl] = jxmin_level[fl];
- ixmax_level[fl] -= ixmin_level[fl];;
- jxmax_level[fl] -= jxmin_level[fl];
- ixmin_level[fl] = 0;
- jxmin_level[fl] = 0;
- }
-
- for (int iface=0; iface < nyface; iface++){
- int fl = yface_level[iface];
-
- int fi = yface_i[iface];
- if (fi < iymin_level[fl]) iymin_level[fl] = fi;
- if (fi > iymax_level[fl]) iymax_level[fl] = fi;
-
- int fj = yface_j[iface];
- if (fj < jymin_level[fl]) jymin_level[fl] = fj;
- if (fj > jymax_level[fl]) jymax_level[fl] = fj;
- }
-
- for (int iface=0; iface < nyface; iface++){
- int fl = yface_level[iface];
- if (iymax_level[fl] < iymin_level[fl]) continue;
-
- yface_i[iface] -= iymin_level[fl];
- yface_j[iface] -= jymin_level[fl];
- }
-
- for (int fl = 0; fl <= levmx; fl++){
- iyadjust[fl] = iymin_level[fl];
- jyadjust[fl] = jymin_level[fl];
- iymax_level[fl] -= iymin_level[fl];;
- jymax_level[fl] -= jymin_level[fl];
- iymin_level[fl] = 0;
- jymin_level[fl] = 0;
- }
-
-}
-
-void Mesh::calc_face_list_wmap(void)
-{
- map_xface2cell_lower.clear();
- map_xface2cell_upper.clear();
-
- xface_i.clear();
- xface_j.clear();
- xface_level.clear();
-
- ixmin_level.clear();
- ixmax_level.clear();
- jxmin_level.clear();
- jxmax_level.clear();
- ixmin_level.resize(levmx+1, 9999999);
- ixmax_level.resize(levmx+1, -9999999);
- jxmin_level.resize(levmx+1, 9999999);
- jxmax_level.resize(levmx+1, -9999999);
-
- ixadjust.clear();
- ixadjust.resize(levmx+1);
- jxadjust.clear();
- jxadjust.resize(levmx+1);
-
- int iface=0;
- for (int nz=0; nz<(int)ncells; nz++){
- int nr = nrht[nz];
- if (nr == nz) continue;
-
- int ifactor = 1;
- if (level[nr] < level[nz]) ifactor = 2;
-
- // Have right face
- map_xface2cell_lower.push_back(nz);
- map_xface2cell_upper.push_back(nr);
- xface_level.push_back(MAX(level[nz],level[nr]));
- xface_i.push_back(i[nr]*ifactor);
- if (level[nr] < level[nz] && is_upper(j[nz]) ) {
- xface_j.push_back(j[nr]*ifactor+1);
- } else {
- xface_j.push_back(j[nr]*ifactor);
- }
-
- iface++;
-
- if (level[nr] > level[nz] && is_lower(j[nr]) ){
- int ntr = ntop[nr];
- if (ntr != nr) {
- map_xface2cell_lower.push_back(nz);
- map_xface2cell_upper.push_back(ntr);
- xface_level.push_back(MAX(level[nz],level[ntr]));
- xface_i.push_back(i[ntr]*ifactor);
- xface_j.push_back(j[ntr]*ifactor);
-
- iface++;
- }
- }
- }
- nxface=iface;
-
- map_yface2cell_lower.clear();
- map_yface2cell_upper.clear();
-
- yface_i.clear();
- yface_j.clear();
- yface_level.clear();
-
- iymin_level.clear();
- iymax_level.clear();
- jymin_level.clear();
- jymax_level.clear();
- iymin_level.resize(levmx+1, 9999999);
- iymax_level.resize(levmx+1, -9999999);
- jymin_level.resize(levmx+1, 9999999);
- jymax_level.resize(levmx+1, -9999999);
-
- iyadjust.clear();
- iyadjust.resize(levmx+1);
- jyadjust.clear();
- jyadjust.resize(levmx+1);
-
- iface=0;
- for (int nz=0; nz<(int)ncells; nz++){
- int nt = ntop[nz];
- if (nt == nz) continue;
-
- int ifactor = 1;
- if (level[nt] < level[nz]) ifactor = 2;
-
- // Have top face
- // printf("DEBUG -- iface %d lower nz %d upper nr %d\n",iface,nz,nt);
- map_yface2cell_lower.push_back(nz);
- map_yface2cell_upper.push_back(nt);
- yface_level.push_back(MAX(level[nz],level[nt]));
- yface_j.push_back(j[nt]*ifactor);
- if (level[nt] < level[nz] && is_upper(i[nz]) ) {
- yface_i.push_back(i[nt]*ifactor+1);
- } else{
- yface_i.push_back(i[nt]*ifactor);
- }
-
- iface++;
- if (level[nt] > level[nz] && is_lower(i[nt]) ){
- int nrt = nrht[nt];
- if (nrt != nt) {
- map_yface2cell_lower.push_back(nz);
- map_yface2cell_upper.push_back(nrt);
- yface_level.push_back(MAX(level[nz],level[nrt]));
- yface_j.push_back(j[nrt]*ifactor);
- yface_i.push_back(i[nrt]*ifactor);
-
- iface++;
- }
- }
- }
- nyface=iface;
-
- for (int iface=0; iface < nxface; iface++){
- int fl = xface_level[iface];
-
- int fi = xface_i[iface];
- if (fi < ixmin_level[fl]) ixmin_level[fl] = fi;
- if (fi > ixmax_level[fl]) ixmax_level[fl] = fi;
-
- int fj = xface_j[iface];
- if (fj < jxmin_level[fl]) jxmin_level[fl] = fj;
- if (fj > jxmax_level[fl]) jxmax_level[fl] = fj;
- }
-
- for (int iface=0; iface < nxface; iface++){
- int fl = xface_level[iface];
- if (ixmax_level[fl] < ixmin_level[fl]) continue;
-
- xface_i[iface] -= ixmin_level[fl];
- xface_j[iface] -= jxmin_level[fl];
- }
-
- for (int fl = 0; fl <= levmx; fl++){
- ixadjust[fl] = ixmin_level[fl];
- jxadjust[fl] = jxmin_level[fl];
- ixmax_level[fl] -= ixmin_level[fl];;
- jxmax_level[fl] -= jxmin_level[fl];
- ixmin_level[fl] = 0;
- jxmin_level[fl] = 0;
- }
-
- for (int iface=0; iface < nyface; iface++){
- int fl = yface_level[iface];
-
- int fi = yface_i[iface];
- if (fi < iymin_level[fl]) iymin_level[fl] = fi;
- if (fi > iymax_level[fl]) iymax_level[fl] = fi;
-
- int fj = yface_j[iface];
- if (fj < jymin_level[fl]) jymin_level[fl] = fj;
- if (fj > jymax_level[fl]) jymax_level[fl] = fj;
- }
-
- for (int iface=0; iface < nyface; iface++){
- int fl = yface_level[iface];
- if (iymax_level[fl] < iymin_level[fl]) continue;
-
- yface_i[iface] -= iymin_level[fl];
- yface_j[iface] -= jymin_level[fl];
- }
-
- for (int fl = 0; fl <= levmx; fl++){
- iyadjust[fl] = iymin_level[fl];
- jyadjust[fl] = jymin_level[fl];
- iymax_level[fl] -= iymin_level[fl];;
- jymax_level[fl] -= jymin_level[fl];
- iymin_level[fl] = 0;
- jymin_level[fl] = 0;
- }
-
-}
-
-void Mesh::calc_face_list_wbidirmap(void)
-{
- map_xface2cell_lower.clear();
- map_xface2cell_upper.clear();
-
- map_xcell2face_left1.clear();
- map_xcell2face_left2.clear();
- map_xcell2face_right1.clear();
- map_xcell2face_right2.clear();
- map_xcell2face_left1.resize(ncells, -1);
- map_xcell2face_left2.resize(ncells, -1);
- map_xcell2face_right1.resize(ncells, -1);
- map_xcell2face_right2.resize(ncells, -1);
-
- xface_i.clear();
- xface_j.clear();
- xface_level.clear();
-
- ixmin_level.clear();
- ixmax_level.clear();
- jxmin_level.clear();
- jxmax_level.clear();
- ixmin_level.resize(levmx+1, 9999999);
- ixmax_level.resize(levmx+1, -9999999);
- jxmin_level.resize(levmx+1, 9999999);
- jxmax_level.resize(levmx+1, -9999999);
-
- ixadjust.clear();
- ixadjust.resize(levmx+1);
- jxadjust.clear();
- jxadjust.resize(levmx+1);
-
- int iface=0;
- for (int nz=0; nz<(int)ncells; nz++){
- int nr = nrht[nz];
- if (nr == nz) continue;
-
- int ifactor = 1;
- if (level[nr] < level[nz]) ifactor = 2;
-
- // Have right face
- map_xface2cell_lower.push_back(nz);
- map_xface2cell_upper.push_back(nr);
- xface_level.push_back(MAX(level[nz],level[nr]));
- xface_i.push_back(i[nr]*ifactor);
- if (level[nr] < level[nz] && is_upper(j[nz]) ) {
- xface_j.push_back(j[nr]*ifactor+1);
- } else {
- xface_j.push_back(j[nr]*ifactor);
- }
- map_xcell2face_right1[nz] = iface;
-
- iface++;
-
- if (level[nr] > level[nz] && is_lower(j[nr]) ){
- int ntr = ntop[nr];
- if (ntr != nr) {
- map_xface2cell_lower.push_back(nz);
- map_xface2cell_upper.push_back(ntr);
- xface_level.push_back(MAX(level[nz],level[ntr]));
- xface_i.push_back(i[ntr]*ifactor);
- xface_j.push_back(j[ntr]*ifactor);
- map_xcell2face_right2[nz] = iface;
-
- iface++;
- }
- }
- }
- nxface=iface;
-
- for (int nz=0; nz<(int)ncells; nz++){
- int nl = nlft[nz];
- if (nl == nz) continue;
-
- if (level[nl] < level[nz] && is_upper(j[nz])){
- map_xcell2face_left1[nz] = map_xcell2face_right2[nl];
- } else {
- map_xcell2face_left1[nz] = map_xcell2face_right1[nl];
- if (level[nl] > level[nz]){
- map_xcell2face_left2[nz] = map_xcell2face_right1[ntop[nl]];
- }
- }
-
- }
-
- map_yface2cell_lower.clear();
- map_yface2cell_upper.clear();
-
- map_ycell2face_bot1.clear();
- map_ycell2face_bot2.clear();
- map_ycell2face_top1.clear();
- map_ycell2face_top2.clear();
- map_ycell2face_bot1.resize(ncells, -1);
- map_ycell2face_bot2.resize(ncells, -1);
- map_ycell2face_top1.resize(ncells, -1);
- map_ycell2face_top2.resize(ncells, -1);
-
- yface_i.clear();
- yface_j.clear();
- yface_level.clear();
-
- iymin_level.clear();
- iymax_level.clear();
- jymin_level.clear();
- jymax_level.clear();
- iymin_level.resize(levmx+1, 9999999);
- iymax_level.resize(levmx+1, -9999999);
- jymin_level.resize(levmx+1, 9999999);
- jymax_level.resize(levmx+1, -9999999);
-
- iyadjust.clear();
- iyadjust.resize(levmx+1);
- jyadjust.clear();
- jyadjust.resize(levmx+1);
-
- iface=0;
- for (int nz=0; nz<(int)ncells; nz++){
- int nt = ntop[nz];
- if (nt == nz) continue;
-
- int ifactor = 1;
- if (level[nt] < level[nz]) ifactor = 2;
-
- // Have top face
- // printf("DEBUG -- iface %d lower nz %d upper nr %d\n",iface,nz,nt);
- map_yface2cell_lower.push_back(nz);
- map_yface2cell_upper.push_back(nt);
- yface_level.push_back(MAX(level[nz],level[nt]));
- yface_j.push_back(j[nt]*ifactor);
- if (level[nt] < level[nz] && is_upper(i[nz]) ) {
- yface_i.push_back(i[nt]*ifactor+1);
- } else{
- yface_i.push_back(i[nt]*ifactor);
- }
- map_ycell2face_top1[nz] = iface;
-
- iface++;
-
- if (level[nt] > level[nz] &&is_lower(i[nt]) ){
- int nrt = nrht[nt];
- if (nrt != nt) {
- map_yface2cell_lower.push_back(nz);
- map_yface2cell_upper.push_back(nrt);
- yface_level.push_back(MAX(level[nz],level[nrt]));
- yface_j.push_back(j[nrt]*ifactor);
- yface_i.push_back(i[nrt]*ifactor);
- map_ycell2face_top2[nz] = iface;
-
- iface++;
- }
- }
- }
- nyface=iface;
-
- for (int nz=0; nz<(int)ncells; nz++){
- int nb = nbot[nz];
- if (nb == nz) continue;
-
- if (level[nb] < level[nz] && is_upper(i[nz])){
- map_ycell2face_bot1[nz] = map_ycell2face_top2[nb];
- } else {
- map_ycell2face_bot1[nz] = map_ycell2face_top1[nb];
- if (level[nb] > level[nz]){
- map_ycell2face_bot2[nz] = map_ycell2face_top1[nrht[nb]];
- }
- }
-
- }
-
- for (int iface=0; iface < nxface; iface++){
- int fl = xface_level[iface];
-
- int fi = xface_i[iface];
- if (fi < ixmin_level[fl]) ixmin_level[fl] = fi;
- if (fi > ixmax_level[fl]) ixmax_level[fl] = fi;
-
- int fj = xface_j[iface];
- if (fj < jxmin_level[fl]) jxmin_level[fl] = fj;
- if (fj > jxmax_level[fl]) jxmax_level[fl] = fj;
- }
-
- for (int iface=0; iface < nxface; iface++){
- int fl = xface_level[iface];
- if (ixmax_level[fl] < ixmin_level[fl]) continue;
-
- xface_i[iface] -= ixmin_level[fl];
- xface_j[iface] -= jxmin_level[fl];
- }
-
- for (int fl = 0; fl <= levmx; fl++){
- ixadjust[fl] = ixmin_level[fl];
- jxadjust[fl] = jxmin_level[fl];
- ixmax_level[fl] -= ixmin_level[fl];;
- jxmax_level[fl] -= jxmin_level[fl];
- ixmin_level[fl] = 0;
- jxmin_level[fl] = 0;
- }
-
- for (int iface=0; iface < nyface; iface++){
- int fl = yface_level[iface];
-
- int fi = yface_i[iface];
- if (fi < iymin_level[fl]) iymin_level[fl] = fi;
- if (fi > iymax_level[fl]) iymax_level[fl] = fi;
-
- int fj = yface_j[iface];
- if (fj < jymin_level[fl]) jymin_level[fl] = fj;
- if (fj > jymax_level[fl]) jymax_level[fl] = fj;
- }
-
- for (int iface=0; iface < nyface; iface++){
- int fl = yface_level[iface];
- if (iymax_level[fl] < iymin_level[fl]) continue;
-
- yface_i[iface] -= iymin_level[fl];
- yface_j[iface] -= jymin_level[fl];
- }
-
- for (int fl = 0; fl <= levmx; fl++){
- iyadjust[fl] = iymin_level[fl];
- jyadjust[fl] = jymin_level[fl];
- iymax_level[fl] -= iymin_level[fl];;
- jymax_level[fl] -= jymin_level[fl];
- iymin_level[fl] = 0;
- jymin_level[fl] = 0;
- }
-
-}
-
-int **Mesh::get_xface_flag(int lev, bool print_output)
-{
- int **xface_flag = (int **)genmatrix(jxmax_level[lev]+1,
- ixmax_level[lev]+1, sizeof(int));
- for (int jj=0; jj<jxmax_level[lev]+1; jj++){
- for (int ii=0; ii<ixmax_level[lev]+1; ii++){
- xface_flag[jj][ii] = -1;
- }
- }
-
- for (int iface=0; iface < nxface; iface++){
- if (xface_level[iface] == lev){
- int ii = xface_i[iface];
- int jj = xface_j[iface];
-
- xface_flag[jj][ii] = 1;
- }
- }
-
- if (DEBUG || print_output) {
- printf("DEBUG -- x face_flag for level %d\n",lev);
- printf("DEBUG -- sizes isize+1 %d jsize+1 %d\n",ixmax_level[lev]+1,jxmax_level[lev]+1);
-
- printf(" ");
- for (int ii=0; ii<ixmax_level[lev]+1; ii++){
- printf(" %4d ",ii);
- }
- printf("\n");
-
- for (int jj=jxmax_level[lev]; jj>=0; jj--){
-
- printf("DEBUG -- j %4d: ",jj);
- for (int ii=0; ii<ixmax_level[lev]+1; ii++){
- if (xface_flag[jj][ii] >= 0){
- //printf(" xface_flag_check[%d][%d] = 1;\n",jj,ii);
- printf(" %4d ", xface_flag[jj][ii]);
- } else {
- printf(" ");
- }
- }
- printf("\n");
- }
- }
-
- return(xface_flag);
-}
-
-int **Mesh::get_yface_flag(int lev, bool print_output)
-{
- int **yface_flag = (int **)genmatrix(jymax_level[lev]+1,
- iymax_level[lev]+1, sizeof(int));
- for (int jj=0; jj<jymax_level[lev]+1; jj++){
- for (int ii=0; ii<iymax_level[lev]+1; ii++){
- yface_flag[jj][ii] = -1;
- }
- }
-
- for (int iface=0; iface < nyface; iface++){
- if (yface_level[iface] == lev){
- int ii = yface_i[iface];
- int jj = yface_j[iface];
-
- yface_flag[jj][ii] = 1;
- }
- }
-
- if (DEBUG || print_output) {
- printf("DEBUG -- y face_flag for level %d\n",lev);
- printf("DEBUG -- sizes isize+1 %d jsize+1 %d\n",iymax_level[lev]+1,jymax_level[lev]+1);
-
- printf(" ");
- for (int ii=0; ii<iymax_level[lev]+1; ii++){
- printf(" %4d ",ii);
- }
- printf("\n");
-
- for (int jj=jymax_level[lev]; jj>=0; jj--){
-
- printf("DEBUG -- j %4d: ",jj);
- for (int ii=0; ii<iymax_level[lev]+1; ii++){
- if (yface_flag[jj][ii] >= 0){
- //printf(" yface_flag_check[%d][%d] = 1;\n",jj,ii);
- printf(" %4d ", yface_flag[jj][ii]);
- } else {
- printf(" ");
- }
- }
- printf("\n");
- }
- }
-
- return(yface_flag);
-}
-
-void Mesh::get_flat_grid(int lev, int ***zone_flag_base, int ***zone_cell_base)
-{
- int isize = ixmax_level[lev]+4;
- int jsize = jymax_level[lev]+4;
- int iadjust = ixadjust[lev]-2;
- int jadjust = jyadjust[lev]-2;
-
- //printf("DEBUG -- sizes isize %d jsize %d\n",isize,jsize);
- //printf("DEBUG -- adjust ixadjust %d jxadjust %d\n",ixadjust[lev],jxadjust[lev]);
- //printf("DEBUG -- adjust iyadjust %d jyadjust %d\n",iyadjust[lev],jyadjust[lev]);
-
- (*zone_flag_base) = (int **)genmatrix(jsize, isize, sizeof(int));
-
- int **zone_flag = *zone_flag_base;
- for (int jj=0; jj<jsize; jj++){
- for (int ii=0; ii<isize; ii++){
- zone_flag[jj][ii] = -1;
- }
- }
-
- (*zone_cell_base) = (int **)genmatrix(jsize, isize, sizeof(int));
-
- int **zone_cell = *zone_cell_base;
- for (int jj=0; jj<jsize; jj++){
- for (int ii=0; ii<isize; ii++){
- zone_cell[jj][ii] = -1;
- }
- }
-
- for (int iface=0; iface < nxface; iface++){
- if (xface_level[iface] == lev){
- int nz1 = map_xface2cell_lower[iface];
- int nz2 = map_xface2cell_upper[iface];
-
- if (lev == level[nz1]) {
- int iii = i[nz1]-iadjust;
- int jjj = j[nz1]-jadjust;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz1;
- if (nlft[nz1] != REAL_CELL) {
- zone_cell[jjj][iii-1] = nlft[nz1];
- }
- } else {
- int iii = i[nz1]*2-iadjust+1;
- int jjj = j[nz1]*2-jadjust;
- if (is_upper(j[nz2])) jjj += 1;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz1;
- zone_cell[jjj][iii-1] = nz1;
- }
- if (lev == level[nz2]) {
- int iii = i[nz2]-iadjust;
- int jjj = j[nz2]-jadjust;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz2;
- if (nrht[nz2] != REAL_CELL) {
- zone_cell[jjj][iii+1] = nrht[nz2];
- }
- } else {
- int iii = i[nz2]*2-iadjust;
- int jjj = j[nz2]*2-jadjust;
- if (is_upper(j[nz1])) jjj += 1;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz2;
- zone_cell[jjj][iii+1] = nz2;
- }
- }
- }
-
- for (int iface=0; iface < nyface; iface++){
- if (yface_level[iface] == lev){
- int nz1 = map_yface2cell_lower[iface];
- int nz2 = map_yface2cell_upper[iface];
-
- if (lev == level[nz1]) {
- int iii = i[nz1]-iadjust;
- int jjj = j[nz1]-jadjust;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz1;
- if (nbot[nz1] != REAL_CELL) {
- zone_cell[jjj-1][iii] = nbot[nz1];
- }
- } else {
- int iii = i[nz1]*2-iadjust;
- int jjj = j[nz1]*2-jadjust+1;
- if (is_upper(i[nz2])) iii += 1;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz1;
- zone_cell[jjj-1][iii] = nz1;
- }
- if (lev == level[nz2]) {
- int iii = i[nz2]-iadjust;
- int jjj = j[nz2]-jadjust;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz2;
- if (ntop[nz2] != REAL_CELL) {
- zone_cell[jjj+1][iii] = ntop[nz2];
- }
- } else {
- int iii = i[nz2]*2-iadjust;
- int jjj = j[nz2]*2-jadjust;
- if (is_upper(i[nz1])) iii += 1;
- zone_flag[jjj][iii] = 1;
- zone_cell[jjj][iii] = nz2;
- zone_cell[jjj+1][iii] = nz2;
- }
- }
- }
-
- if (DEBUG) {
- printf("DEBUG -- zone_flag for level %d\n",lev);
- printf("DEBUG -- sizes isize %d jsize %d\n",isize,jsize);
- for (int j=jsize-1; j>=0; j--){
- for (int i=0; i<isize; i++){
- if (zone_flag[j][i] >= 0){
- printf(" zone_flag_check[%d][%d] = 1;\n",j,i);
- }
- }
- }
- for (int j=jsize-1; j>=0; j--){
- for (int i=0; i<isize; i++){
- if (zone_cell[j][i] >= 0){
- printf(" zone_cell_check[%d][%d] = %d;\n",j,i,zone_cell[j][i]);
- }
- }
- }
-
- printf(" ");
- for (int i=0; i<isize; i++){
- printf(" %4d ",i);
- }
- printf("\n");
-
- for (int j=jsize-1; j>=0; j--){
-
- printf("DEBUG -- j %4d: ",j);
- for (int i=0; i<isize; i++){
- if (zone_flag[j][i] >= 0){
- printf(" %4d ", zone_flag[j][i]);
- } else {
- printf(" ");
- }
- }
- printf("\n");
- }
-
- printf("DEBUG -- zone_cell for level %d\n",lev);
-
- printf(" ");
- for (int i=0; i<isize; i++){
- printf(" %4d ",i);
- }
- printf("\n");
-
- for (int j=jsize-1; j>=0; j--){
-
- printf("DEBUG -- j %4d: ",j);
- for (int i=0; i<isize; i++){
- if (zone_cell[j][i] >= 0){
- printf(" %4d ", zone_cell[j][i]);
- } else {
- printf(" ");
- }
- }
- printf("\n");
- }
- }
-}
-
-void Mesh::calc_face_list_clearmaps()
-{
- map_xface2cell_lower.clear();
- map_xface2cell_upper.clear();
-
- map_xcell2face_left1.clear();
- map_xcell2face_left2.clear();
- map_xcell2face_right1.clear();
- map_xcell2face_right2.clear();
-
- map_yface2cell_lower.clear();
- map_yface2cell_upper.clear();
-
- map_ycell2face_bot1.clear();
- map_ycell2face_bot2.clear();
- map_ycell2face_top1.clear();
- map_ycell2face_top2.clear();
-}
-
-void Mesh::timer_output(mesh_timer_category category, mesh_device_types device_type, int timer_level)
-{
- double local_time = 0.0;
- if (device_type == MESH_DEVICE_CPU){
- local_time = get_cpu_timer(category);
- } else {
- local_time = get_gpu_timer(category);
- }
-
- char string[80] = "/0";
-
- if (mype == 0) {
- const char *blank=" ";
-
- if (device_type == MESH_DEVICE_CPU){
- sprintf(string,"CPU: %.*s%-30.30s\t", 2*timer_level, blank, mesh_timer_descriptor[category]);
- } else {
- sprintf(string,"GPU: %.*s%-30.30s\t", 2*timer_level, blank, mesh_timer_descriptor[category]);
- }
- }
-
- parallel_output(string, local_time, timer_level, "s");
-}
-
-void Mesh::parallel_output(const char *string, double local_value, int output_level, const char *units)
-{
- vector<double> global_values(numpe);
- global_values[0] = local_value;
-#ifdef HAVE_MPI
- if (numpe > 1) {
- MPI_Gather(&local_value, 1, MPI_DOUBLE, &global_values[0], 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
- }
-#endif
- if (mype == 0) {
- const char *blank=" ";
-
- printf("%s\t",string);
- if (numpe <= 4) {
- for(int ip = 0; ip < numpe; ip++){
- printf("%.*s%8.4f\t", 2*output_level, blank, global_values[ip]);
- }
- printf("%s\n",units);
- } else {
- sort(global_values.begin(),global_values.end());
- double median_value;
- int half_value = numpe/2;
- if (numpe%2 == 0) {
- median_value = (global_values[half_value-1]+global_values[half_value])/2.0;
- } else {
- median_value = global_values[half_value+1];
- }
- printf("%.*s%8.4f\t%.*s%8.4f\t%.*s%8.4f %s min/median/max\n",
- 2*output_level, blank, global_values[0],
- 2*output_level, blank, median_value,
- 2*output_level, blank, global_values[numpe-1],
- units);
- }
- }
-}
-
-void Mesh::parallel_output(const char *string, long long local_value, int output_level, const char *units)
-{
- vector<long long> global_values(numpe);
- global_values[0] = local_value;
-#ifdef HAVE_MPI
- if (numpe > 1) {
- MPI_Gather(&local_value, 1, MPI_LONG_LONG, &global_values[0], 1, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
- }
-#endif
- if (mype == 0) {
- const char *blank=" ";
-
- printf("%s\t",string);
- if (numpe <= 4) {
- for(int ip = 0; ip < numpe; ip++){
- printf("%.*s%10lld\t", 2*output_level, blank, global_values[ip]);
- }
- printf("%s\n",units);
- } else {
- sort(global_values.begin(),global_values.end());
- long long median_value;
- int half_value = numpe/2;
- if (numpe%2 == 0) {
- median_value = (global_values[half_value-1]+global_values[half_value])/2;
- } else {
- median_value = global_values[half_value+1];
- }
- printf("%.*s%10lld\t%.*s%10lld\t%.*s%10lld %s min/median/max\n",
- 2*output_level, blank, global_values[0],
- 2*output_level, blank, median_value,
- 2*output_level, blank, global_values[numpe-1],
- units);
- }
- }
-}
-
-void Mesh::parallel_output(const char *string, int local_value, int output_level, const char *units)
-{
- vector<int> global_values(numpe);
- global_values[0] = local_value;
-#ifdef HAVE_MPI
- if (numpe > 1) {
- MPI_Gather(&local_value, 1, MPI_INT, &global_values[0], 1, MPI_INT, 0, MPI_COMM_WORLD);
- }
-#endif
- if (mype == 0) {
- const char *blank=" ";
-
- printf("%s\t",string);
- if (numpe <= 4) {
- for(int ip = 0; ip < numpe; ip++){
- printf("%.*s%10d\t", 2*output_level, blank, global_values[ip]);
- }
- printf("%s\n",units);
- } else {
- sort(global_values.begin(),global_values.end());
- int median_value;
- int half_value = numpe/2;
- if (numpe%2 == 0) {
- median_value = (global_values[half_value-1]+global_values[half_value])/2;
- } else {
- median_value = global_values[half_value+1];
- }
- printf("%.*s%10d\t%.*s%10d\t%.*s%10d %s min/median/max\n",
- 2*output_level, blank, global_values[0],
- 2*output_level, blank, median_value,
- 2*output_level, blank, global_values[numpe-1],
- units);
- }
- }
-}
-
-const int CRUX_MESH_VERSION = 103;
-const int num_int_dist_vals = 3;
-const int num_int_vals = 3;
-const int num_double_vals = 1;
-
-size_t Mesh::get_checkpoint_size(void)
-{
- size_t nsize;
- nsize = num_int_dist_vals*sizeof(int);
- nsize += num_int_vals*sizeof(int);
- nsize += num_double_vals*sizeof(double);
- nsize += 2*MESH_COUNTER_SIZE*sizeof(int);
- nsize += MESH_TIMER_SIZE*sizeof(double);
- nsize += MESH_TIMER_SIZE*sizeof(long);
- nsize += ncells*3*sizeof(int);
- return(nsize);
-}
-
-void Mesh::store_checkpoint(Crux *crux)
-{
- // Need ncells for memory allocation
- int storage = mesh_memory.get_memory_capacity(level);
- crux->store_named_ints("storage", 7, &storage, 1);
- // Write scalars to arrays for storing in checkpoint
- int int_vals[num_int_vals];
-
- int_vals[ 0] = CRUX_MESH_VERSION;
- int_vals[ 1] = ndim;
- int_vals[ 2] = levmx;
-
- // These are for values that will be different on every processor
- int int_dist_vals[num_int_dist_vals];
- int_dist_vals[ 0] = (int)ncells;
- int_dist_vals[ 1] = (int)ncells_ghost;
- int_dist_vals[ 2] = offtile_local_count;
-
- double double_vals[num_double_vals];
-
- double_vals[0] = offtile_ratio_local;
-
- int flags = RESTART_DATA;
- // Now add memory entries to database for storing checkpoint
- mesh_memory.memory_add(int_dist_vals, (size_t)num_int_dist_vals, 4, "mesh_int_dist_vals", flags);
- flags = RESTART_DATA | REPLICATED_DATA;
- mesh_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "mesh_int_vals", flags);
-
- flags = RESTART_DATA;
- mesh_memory.memory_add(double_vals, (size_t)num_double_vals, 8, "mesh_double_vals", flags);
- mesh_memory.memory_add(cpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_cpu_counters", flags);
- mesh_memory.memory_add(gpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_gpu_counters", flags);
-
- mesh_memory.memory_add(cpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_cpu_timers", flags);
- mesh_memory.memory_add(gpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_gpu_timers", flags);
-
- // Store MallocPlus memory database
- crux->store_MallocPlus(mesh_memory);
-
- // Remove memory entries from database now that data is stored
- mesh_memory.memory_remove(int_dist_vals);
- mesh_memory.memory_remove(int_vals);
- mesh_memory.memory_remove(double_vals);
- mesh_memory.memory_remove(cpu_counters);
- mesh_memory.memory_remove(gpu_counters);
- mesh_memory.memory_remove(cpu_timers);
- mesh_memory.memory_remove(gpu_timers);
-}
-
-void Mesh::restore_checkpoint(Crux *crux)
-{
- int storage;
- crux->restore_named_ints("storage", 7, &storage, 1);
-
- // Create memory for reading data into
- int int_dist_vals[num_int_dist_vals];
- int int_vals[num_int_vals];
- double double_vals[num_double_vals];
-
- mesh_memory.memory_delete(nlft);
- mesh_memory.memory_delete(nrht);
- mesh_memory.memory_delete(nbot);
- mesh_memory.memory_delete(ntop);
- mesh_memory.memory_delete(celltype);
-
- nlft = NULL;
- nrht = NULL;
- ntop = NULL;
- nbot = NULL;
- celltype = NULL;
-
- // Resize is a mesh method
- // resize(storage);
- // memory_reset_ptrs();
- allocate (storage);
-
- int flags = RESTART_DATA;
- // Now add memory entries to database for restoring checkpoint
- mesh_memory.memory_add(int_dist_vals, (size_t)num_int_dist_vals, 4, "mesh_int_dist_vals", flags);
- flags = RESTART_DATA | REPLICATED_DATA;
- mesh_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "mesh_int_vals", flags);
- mesh_memory.memory_add(double_vals, (size_t)num_double_vals, 8, "mesh_double_vals", flags);
-
- flags = RESTART_DATA;
- mesh_memory.memory_add(cpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_cpu_counters", flags);
- mesh_memory.memory_add(gpu_counters, (size_t)MESH_COUNTER_SIZE, 4, "mesh_gpu_counters", flags);
-
- mesh_memory.memory_add(cpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_cpu_timers", flags);
- mesh_memory.memory_add(gpu_timers, (size_t)MESH_TIMER_SIZE, 8, "mesh_gpu_timers", flags);
-
- // Restore MallocPlus memory database
- crux->restore_MallocPlus(mesh_memory);
-
- // Remove memory entries from database now that data is restored
- mesh_memory.memory_remove(int_dist_vals);
- mesh_memory.memory_remove(int_vals);
- mesh_memory.memory_remove(double_vals);
- mesh_memory.memory_remove(cpu_counters);
- mesh_memory.memory_remove(gpu_counters);
- mesh_memory.memory_remove(cpu_timers);
- mesh_memory.memory_remove(gpu_timers);
-
- // Check version number
- if (int_vals[ 0] != CRUX_MESH_VERSION) {
- printf("CRUX version mismatch for mesh data, version on file is %d, version in code is %d\n",
- int_vals[0], CRUX_MESH_VERSION);
- exit(0);
- }
-
- // Copy out scalar values from array
- ncells = int_dist_vals[ 0];
- ncells_ghost = int_dist_vals[ 1];
- offtile_local_count = int_dist_vals[ 2];
-
- // Copy out scalar values from array
- ndim = int_vals[ 1];
- levmx = int_vals[ 2];
-
-#ifdef DEBUG_RESTORE_VALS
- if (DEBUG_RESTORE_VALS && mype == 0) {
- const char *int_dist_vals_descriptor[num_int_dist_vals] = {
- "ncells",
- "ncells_ghost",
- "offtile_local_count"
- };
- const char *int_vals_descriptor[num_int_vals] = {
- "CRUX_MESH_VERSION",
- "ndim",
- "levmx",
- };
- printf("\n");
- printf(" === Restored mesh int_dist_vals ===\n");
- for (int i = 0; i < num_int_dist_vals; i++){
- printf(" %-30s %d\n",int_dist_vals_descriptor[i], int_dist_vals[i]);
- }
- printf(" === Restored mesh int_vals ===\n");
- for (int i = 0; i < num_int_vals; i++){
- printf(" %-30s %d\n",int_vals_descriptor[i], int_vals[i]);
- }
- printf(" === Restored mesh int_vals ===\n");
- printf("\n");
- }
-#endif
-
- offtile_ratio_local = double_vals[0];
-
-#ifdef DEBUG_RESTORE_VALS
- if (DEBUG_RESTORE_VALS && mype == 0) {
- const char *double_vals_descriptor[num_double_vals] = {
- "offtile_ratio_local"
- };
- printf("\n");
- printf(" === Restored mesh double_vals ===\n");
- for (int i = 0; i < num_double_vals; i++){
- printf(" %-30s %lf\n",double_vals_descriptor[i], double_vals[i]);
- }
- printf(" === Restored mesh double_vals ===\n");
- printf("\n");
- }
-#endif
-
-#ifdef DEBUG_RESTORE_VALS
- if (DEBUG_RESTORE_VALS && mype == 0) {
- printf(" === Restored mesh cpu counters ===\n");
- for (int i = 0; i < MESH_COUNTER_SIZE; i++){
- printf(" %-30s %d\n",mesh_counter_descriptor[i], cpu_counters[i]);
- }
- printf(" === Restored mesh cpu counters ===\n");
- printf(" === Restored mesh gpu counters ===\n");
- for (int i = 0; i < MESH_COUNTER_SIZE; i++){
- printf(" %-30s %d\n",mesh_counter_descriptor[i], gpu_counters[i]);
- }
- printf(" === Restored mesh gpu counters ===\n");
- printf("\n");
- }
-#endif
-
-#ifdef DEBUG_RESTORE_VALS
- if (DEBUG_RESTORE_VALS && mype == 0) {
- printf(" === Restored mesh cpu timers ===\n");
- for (int i = 0; i < MESH_TIMER_SIZE; i++){
- printf(" %-30s %lf\n",mesh_timer_descriptor[i], cpu_timers[i]);
- }
- printf(" === Restored mesh cpu timers ===\n");
- printf("\n");
- }
-#endif
-
-#ifdef DEBUG_RESTORE_VALS
- if (DEBUG_RESTORE_VALS && mype == 0) {
- printf("\n");
- printf(" === Restored mesh gpu timers ===\n");
- for (int i = 0; i < MESH_TIMER_SIZE; i++){
- printf(" %-30s %lld\n",mesh_timer_descriptor[i], gpu_timers[i]);
- }
- printf(" === Restored mesh gpu timers ===\n");
- printf("\n");
- }
-#endif
- //calc_celltype(ncells);
-}
-
-
-// This code due to Matt Calef
-void scan ( scanInt *input , scanInt *output , scanInt length)
-{
-#ifdef _OPENMP
- // This already assumes it is in a parallel region
-
- // Get the total number of threads
-
- scanInt numThreads = omp_get_num_threads ( );
-
- // Compute the range for which this thread is responsible.
-
- scanInt threadID = omp_get_thread_num ( );
- scanInt start = length * ( threadID ) / numThreads;
- scanInt end = length * ( threadID + 1 ) / numThreads;
-
- // In the case that there are fewer entries than threads, some
- // threads will have no entries. Only perform this operation if
- // there is a postive number of entries.
-
- if ( start < end ) {
-
- // Do a scan over the region for this thread, with an initial
- // value of zero.
-
- output[start] = 0;
- for ( scanInt i = start + 1 ; i < end ; i++ )
- output[i] = output[i-1] + input[i-1];
- }
-
- // Wait until all threads get here.
-
-#pragma omp barrier
-
- // At this point each thread has done an independent scan of its
- // region. All scans, except the first, are off by an
- // offset. Here we have a single thread compute that offset with a
- // serial scan that strides over the regions assigned to each
- // thread.
-
-#pragma omp single
- for ( scanInt i = 1 ; i < numThreads ; i ++ ) {
- scanInt s0 = length * ( i - 1 ) / numThreads;
- scanInt s1 = length * ( i ) / numThreads;
-
- if ( s0 < s1 )
- output[s1] = output[s0] + input[s1-1];
-
- if ( s0 < s1 - 1 )
- output[s1] += output[s1-1];
- }
-
- // Barrier is implicit from omp single Wait until all threads get here.
-
- // Apply the offset to the range for this thread.
-
- for ( scanInt i = start + 1 ; i < end ; i++ )
- output[i] += output[start];
-
-#else
- output[0] = 0;
- for (int ic = 0; ic < length; ic++){
- output[ic+1] = output[ic] + input[ic];
- }
-#endif
-}
-/****************************************************//**
-*GET BOUNDS!!!!!!****
-**********************************/
-void Mesh::get_bounds(int& lowerBound, int& upperBound){
-#ifdef _OPENMP
- int threadID = omp_get_thread_num();
- lowerBound = lowerBound_Global[threadID];
- upperBound = upperBound_Global[threadID];
-// printf("GETBOUNDs ThreadID: %d, upperBound: %d, lowerBound: %d \n",threadID, upperBound, lowerBound);
-#else
- lowerBound = 0;
- upperBound = ncells;
-#endif
-}
-
-/****************************************************//**
-*SETTING BOUNDS!!!!!!****
-**********************************/
-void Mesh::set_bounds(int n){
-
-#ifdef _OPENMP
- // #pragma omp parallel
- {
- int nthreads = omp_get_num_threads();//Private for each thread
- int threadID = omp_get_thread_num(); //Private for each thread
- #pragma omp master
- {
- if(lowerBound_Global == NULL) lowerBound_Global = (int *)malloc(nthreads*sizeof(int));
- if(upperBound_Global == NULL) upperBound_Global = (int *)malloc(nthreads*sizeof(int));
- }
- //#pragma omp flush (lowerBound_Global, upperBound_Global)
- #pragma omp barrier
-
- int work = n/nthreads;
- if(threadID<(n%nthreads))work++;
- int lowerBound = ((n / nthreads)*threadID) + min(n%nthreads, threadID);
- int upperBound = lowerBound + work;
-// printf("ThreadID: %d, upperBound: %d, lowerBound: %d \n",threadID, upperBound, lowerBound);
- lowerBound_Global[threadID] = lowerBound;
- upperBound_Global[threadID] = upperBound;
- }
-#else
- if(lowerBound_Global == NULL) lowerBound_Global = (int *)malloc(1*sizeof(int));
- if(upperBound_Global == NULL) upperBound_Global = (int *)malloc(1*sizeof(int));
- int lowerBound = 0;
- int upperBound = ncells;
- lowerBound_Global[0] = lowerBound;
- upperBound_Global[0] = upperBound;
-#endif
-
-}
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/mesh.h?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/mesh.h (removed)
@@ -1,711 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifndef MESH_H_
-#define MESH_H_
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "MallocPlus.h"
-#include <string>
-#include <stdio.h>
-#include <vector>
-#include <math.h>
-#include "KDTree.h"
-#include "crux.h"
-#include "partition.h"
-#ifdef HAVE_OPENCL
-#include "ezcl/ezcl.h"
-#endif
-
-#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION)
-#define FULL_PRECISION
-#endif
-#ifdef NO_CL_DOUBLE
-#undef FULL_PRECISION
-#undef MIXED_PRECISION
-#define MINIMUM_PRECISION
-#endif
-
-#if defined(MINIMUM_PRECISION)
- typedef float real_t; // this is used for intermediate calculations
- typedef float spatial_t; // for spatial variables
-#ifdef HAVE_OPENCL
- typedef cl_float cl_real_t; // for intermediate gpu physics state variables
- typedef cl_float cl_spatial_t;
-#endif
-#ifdef HAVE_MPI
- #define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
- #define MPI_SPATIAL_T MPI_FLOAT
-#endif
-
-#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
- typedef double real_t;
- typedef float spatial_t; // for spatial variables
-#ifdef HAVE_OPENCL
- typedef cl_double cl_real_t; // for intermediate gpu physics state variables
- typedef cl_float cl_spatial_t;
-#endif
-#ifdef HAVE_MPI
- #define MPI_REAL_T MPI_DOUBLE
- #define MPI_SPATIAL_T MPI_FLOAT
-#endif
-
-#elif defined(FULL_PRECISION)
- typedef double real_t;
- typedef double spatial_t; // for spatial variables
-#ifdef HAVE_OPENCL
- typedef cl_double cl_real_t; // for intermediate gpu physics state variables
- typedef cl_double cl_spatial_t;
-#endif
-#ifdef HAVE_MPI
- #define MPI_REAL_T MPI_DOUBLE
- #define MPI_SPATIAL_T MPI_DOUBLE
-#endif
-#endif
-
-#define TILE_SIZE 128
-
-#define SWAP_PTR(xnew,xold,xtmp) (xtmp=xnew, xnew=xold, xold=xtmp)
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-
-typedef unsigned int uint;
-
-//float mem_opt_factor = 1.0;
-
-enum boundary
-{ REAL_CELL = 1, // Denotes cell type of real cell.
- LEFT_BOUNDARY = -1, // Denotes left boundary ghost cell.
- RIGHT_BOUNDARY = -2, // Denotes right boundary ghost cell.
- BOTTOM_BOUNDARY= -3, // Denotes bottom boundary ghost cell.
- TOP_BOUNDARY = -4, // Denotes top boundary ghost cell.
- FRONT_BOUNDARY = -5, // Denotes front boundary ghost cell.
- BACK_BOUNDARY = -6 }; // Denotes back boundary ghost cell.
-
-enum dimensionality
-{ ONE_DIMENSIONAL = 1, // Dimensionality based at 1 for clarity.
- TWO_DIMENSIONAL,
- THREE_DIMENSIONAL};
-
-enum orientation
-{ SW, // SW quadrant.
- NW, // NW quadrant.
- NE, // NE quadrant.
- SE }; // SE quadrant.
-
-enum neighbor_calc
-{ HASH_TABLE, // Hash Table.
- KDTREE }; // kD-tree.
-
-enum mesh_timers
-{
- MESH_TIMER_COUNT_BCS,
- MESH_TIMER_CALC_NEIGHBORS,
- MESH_TIMER_HASH_SETUP,
- MESH_TIMER_HASH_QUERY,
- MESH_TIMER_FIND_BOUNDARY,
- MESH_TIMER_PUSH_SETUP,
- MESH_TIMER_PUSH_BOUNDARY,
- MESH_TIMER_LOCAL_LIST,
- MESH_TIMER_LAYER1,
- MESH_TIMER_LAYER2,
- MESH_TIMER_LAYER_LIST,
- MESH_TIMER_COPY_MESH_DATA,
- MESH_TIMER_FILL_MESH_GHOST,
- MESH_TIMER_FILL_NEIGH_GHOST,
- MESH_TIMER_SET_CORNER_NEIGH,
- MESH_TIMER_NEIGH_ADJUST,
- MESH_TIMER_SETUP_COMM,
- MESH_TIMER_KDTREE_SETUP,
- MESH_TIMER_KDTREE_QUERY,
- MESH_TIMER_REFINE_SMOOTH,
- MESH_TIMER_REZONE_ALL,
- MESH_TIMER_PARTITION,
- MESH_TIMER_CALC_SPATIAL_COORDINATES,
- MESH_TIMER_LOAD_BALANCE,
- MESH_TIMER_SIZE
-};
-
-enum mesh_counters
-{
- MESH_COUNTER_REZONE,
- MESH_COUNTER_REFINE_SMOOTH,
- MESH_COUNTER_CALC_NEIGH,
- MESH_COUNTER_LOAD_BALANCE,
- MESH_COUNTER_SIZE
-};
-
-//#ifdef DEBUG_RESTORE_VALS
-static const char *mesh_counter_descriptor[MESH_COUNTER_SIZE] = {
- "mesh_counter_rezone",
- "mesh_counter_refine_smooth",
- "mesh_counter_calc_neigh",
- "mesh_counter_load_balance"
-};
-//#endif
-
-typedef enum mesh_timers mesh_timer_category;
-typedef enum mesh_counters mesh_counter_category;
-
-enum mesh_device_types
-{
- MESH_DEVICE_CPU,
- MESH_DEVICE_GPU
-};
-
-typedef mesh_device_types mesh_device_type;
-
-using namespace std;
-
-/****************************************************************//**
- * Mesh class
- * Contains the cell-based adaptive mesh refinement
- * (AMR) object with its data and methods.
- *******************************************************************/
-class Mesh
-{
-
-public:
- int ndim; //!< Dimensionality of mesh (2 or 3).
-
- MallocPlus mesh_memory;
- MallocPlus gpu_mesh_memory;
-
-#ifdef HAVE_OPENCL
- string defines;
-#endif
-
- double cpu_timers[MESH_TIMER_SIZE];
- long long gpu_timers[MESH_TIMER_SIZE];
-
- int cpu_counters[MESH_COUNTER_SIZE];
- int gpu_counters[MESH_COUNTER_SIZE];
-
- bool do_rezone,
- gpu_do_rezone;
-
- int mype,
- numpe,
- parallel,
- cell_handle,
- noffset;
-
- int *lowerBound_Global,
- *upperBound_Global;
-
- float mem_factor;
-
- double offtile_ratio_local;
- int offtile_local_count;
-
- vector<int> corners_i,
- corners_j;
-
- vector<int> nsizes,
- ndispl;
-
- FILE *fp;
-
- TKDTree tree; //!< k-D tree for neighbor search.
- vector<int> proc;
- vector<int> lev_ibegin, //!< Lowest x-index in use at specified level of refinement.
- lev_iend, //!< Highest x-index in use at specified level of refinement.
- lev_jbegin, //!< Lowest y-index in use at specified level of refinement.
- lev_jend, //!< Highest y-index in use at specified level of refinement.
- lev_kbegin, //!< Lowest z-index in use at specified level of refinement.
- lev_kend, //!< Highest z-index in use at specified level of refinement.
- levtable; //!< Powers of two to simplify i,j calculations
- vector<real_t> lev_deltax, //!< Grid spacing along x-axis at specified level of refinement.
- lev_deltay, //!< Grid spacing along y-axis at specified level of refinement.
- lev_deltaz; //!< Grid spacing along z-axis at specified level of refinement.
- int levmx, //!< Maximum level of refinement allowed.
- have_boundary,//!< Mesh includes boundary cells, else creates on the fly
- ibase, //!< Index basis for arrays (0 for C, 1 for Fortan).
- imin, //!< Lowest x-index in use.
- imax, //!< Highest x-index in use.
- jmin, //!< Lowest y-index in use.
- jmax, //!< Highest y-index in use.
- kmin, //!< Lowest z-index in use.
- kmax; //!< Highest z-index in use.
- size_t ncells, //!< Number of cells in mesh.
- ncells_global, //!< Global number of cells for parallel runs
- ncells_ghost; //!< Number of cells in mesh with ghost cells.
- real_t xmin, //!< Lowest x-coordinate in use.
- xmax, //!< Highest x-coordinate in use.
- ymin, //!< Lowest y-coordinate in use.
- ymax, //!< Highest y-coordinate in use.
- zmin, //!< Lowest z-coordinate in use.
- zmax, //!< Highest z-coordinate in use.
- xcentermin, //!< Center of minimum x cell
- xcentermax, //!< Center of maximum x cell
- ycentermin, //!< Center of minimum y cell
- ycentermax, //!< Center of maximum y cell
- zcentermin, //!< Center of minimum z cell
- zcentermax, //!< Center of maximum z cell
- deltax, //!< Grid spacing along x-axis.
- deltay, //!< Grid spacing along y-axis.
- deltaz; //!< Grid spacing along z-axis.
-
- vector<int> index; //!< 1D ordered index of mesh elements.
-
- // mesh state data
- int *i, //!< 1D array of mesh element x-indices.
- *j, //!< 1D array of mesh element y-indices.
- *k, //!< 1D array of mesh element z-indices.
- *level, //!< 1D array of mesh element refinement levels.
- //!< derived data from mesh state data
- *celltype, //!< 1D ordered index of mesh element cell types (ghost or real).
- *nlft, //!< 1D ordered index of mesh element left neighbors.
- *nrht, //!< 1D ordered index of mesh element right neighbors.
- *nbot, //!< 1D ordered index of mesh element bottom neighbors.
- *ntop, //!< 1D ordered index of mesh element top neighbors.
- *nfrt, //!< 1D ordered index of mesh element front neighbors.
- *nbak; //!< 1D ordered index of mesh element back neighbors.
-
- vector<spatial_t> x, //!< 1D ordered index of mesh element x-coordinates.
- dx, //!< 1D ordered index of mesh element x-coordinate spacings.
- y, //!< 1D ordered index of mesh element y-coordinates.
- dy, //!< 1D ordered index of mesh element y-coordinate spacings.
- z, //!< 1D ordered index of mesh element z-coordinates.
- dz; //!< 1D ordered index of mesh element z-coordinate spacings.
-
-#ifdef HAVE_OPENCL
- cl_mem dev_ioffset;
-
- cl_mem dev_celltype,
- dev_i,
- dev_j,
- dev_level,
- dev_nlft,
- dev_nrht,
- dev_nbot,
- dev_ntop;
-
- cl_mem dev_levdx, // corresponds to lev_deltax
- dev_levdy, // corresponds to lev_deltay
- dev_levibeg,
- dev_leviend,
- dev_levjbeg,
- dev_levjend,
- dev_levtable; //
-
- cl_mem dev_corners_i,
- dev_corners_j;
-#endif
-
- int nxface;
- int nyface;
-
- vector<int> xface_i;
- vector<int> xface_j;
- vector<int> xface_level;
- vector<int> map_xface2cell_lower;
- vector<int> map_xface2cell_upper;
-
- vector<int> map_xcell2face_left1;
- vector<int> map_xcell2face_left2;
- vector<int> map_xcell2face_right1;
- vector<int> map_xcell2face_right2;
-
- vector<int> ixmin_level;
- vector<int> ixmax_level;
- vector<int> jxmin_level;
- vector<int> jxmax_level;
- vector<int> ixadjust;
- vector<int> jxadjust;
-
- vector<int> yface_i;
- vector<int> yface_j;
- vector<int> yface_level;
- vector<int> map_yface2cell_lower;
- vector<int> map_yface2cell_upper;
-
- vector<int> map_ycell2face_bot1;
- vector<int> map_ycell2face_bot2;
- vector<int> map_ycell2face_top1;
- vector<int> map_ycell2face_top2;
-
- vector<int> iymin_level;
- vector<int> iymax_level;
- vector<int> jymin_level;
- vector<int> jymax_level;
- vector<int> iyadjust;
- vector<int> jyadjust;
-
- // Public constructors.
- Mesh(FILE *fin, int *numpe);
- Mesh(int nx, int ny, int levmx_in, int ndim_in, double deltax_in, double deltay_in, int boundary, int parallel_in, int do_gpu_calc);
-
- // Member functions.
- void init(int nx, int ny, real_t circ_radius, partition_method initial_order, int do_gpu_calc);
- void terminate(void);
-
- void set_bounds(int n);
- void get_bounds(int& lowerBound, int& upperBound);
-
-/****************************************************************//**
- * @name Memory routines
- *******************************************************************/
-///@{
-
-/****************************************************************//**
- * \brief
- * Allocates the basic mesh memory, i, j, and level, using the MallocPlus
- * memory database.
- *
- * **Parameters**
- * * size_t ncells -- number of cells in the mesh
- *
- * Typical Usage
- *
- * mesh.allocate(ncells);
- *******************************************************************/
- void allocate(size_t ncells);
-
- void resize(size_t new_ncells);
- void memory_reset_ptrs(void);
- void resize_old_device_memory(size_t ncells);
-///@}
-
-/* inline "macros" */
-
-///@{
-/****************************************************************//**
- * \brief
- * Boundary cell tests
- *******************************************************************/
- int is_lower_boundary(int *iv, int *lev_begin, int ic) { return (iv[ic] < lev_begin[level[ic]]); }
- int is_upper_boundary(int *iv, int *lev_end, int ic) { return (iv[ic] > lev_end[level[ic]]); }
-
- int is_left_boundary(int ic) { return (i[ic] < lev_ibegin[level[ic]]); }
- int is_right_boundary(int ic) { return (i[ic] > lev_iend[ level[ic]]); }
- int is_bottom_boundary(int ic) { return (j[ic] < lev_jbegin[level[ic]]); }
- int is_top_boundary(int ic) { return (j[ic] > lev_jend[ level[ic]]); }
- int is_front_boundary(int ic) { return (k[ic] < lev_kbegin[level[ic]]); }
- int is_back_boundary(int ic) { return (k[ic] > lev_kend[ level[ic]]); }
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Tests for positioning in set of 4 cells
- *******************************************************************/
- int is_lower(int i) { return(i % 2 == 0); }
- int is_upper(int i) { return(i % 2 == 1); }
-
- int is_lower_left(int i, int j) { return(i % 2 == 0 && j % 2 == 0); }
- int is_lower_right(int i, int j) { return(i % 2 == 1 && j % 2 == 0); }
- int is_upper_left(int i, int j) { return(i % 2 == 0 && j % 2 == 1); }
- int is_upper_right(int i, int j) { return(i % 2 == 1 && j % 2 == 1); }
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Level tests
- *******************************************************************/
- int is_same_level_or_coarser(int nn, int nz) { return(level[nn] <= level[nz]); }
- int is_coarser(int nn, int nz) { return(level[nn] < level[nz]); }
- int is_finer(int nn, int nz) { return(level[nn] > level[nz]); }
- int is_same_level(int nn, int nz) { return(level[nn] == level[nz]); }
-///@}
-
-/* accessor routines */
- double get_cpu_timer(mesh_timer_category category) {return(cpu_timers[category]); };
- /* Convert nanoseconds to msecs */
- double get_gpu_timer(mesh_timer_category category) {return((double)(gpu_timers[category])*1.0e-9); };
-
- void parallel_output(const char *string, double local_value, int output_level, const char *units);
- void parallel_output(const char *string, long long local_value, int output_level, const char *units);
- void parallel_output(const char *string, int local_value, int output_level, const char *units);
- void timer_output(mesh_timer_category category, mesh_device_types device_type, int timer_level);
-
- int get_cpu_counter(mesh_counter_category category) {return(cpu_counters[category]); };
- int get_gpu_counter(mesh_counter_category category) {return(gpu_counters[category]); };
-
- int get_calc_neighbor_type(void);
-
- void print_partition_measure(void);
- void print_calc_neighbor_type(void);
- void print_partition_type(void);
-/* end accessor routines */
-
-/* Debugging, internal, or not used yet */
-#ifdef HAVE_OPENCL
- int gpu_count_BCs();
-#endif
- void kdtree_setup(void);
- void partition_measure(void);
- void partition_cells(int numpe,
- vector<int> &order,
- enum partition_method method);
- void calc_distribution(int numpe);
- void calc_symmetry(vector<int> &dsym,
- vector<int> &xsym,
- vector<int> &ysym);
-
-/* End of debugging, internal, or not used yet */
-
- //void calc_face_list_test(double *H);
- void calc_face_list(void);
- void calc_face_list_wmap(void);
- void calc_face_list_wbidirmap(void);
- void calc_face_list_clearmaps(void);
-
- int **get_xface_flag(int lev, bool print_output=0);
- int **get_yface_flag(int lev, bool print_output=0);
- void get_flat_grid(int lev, int ***zone_flag, int ***zone_cell);
-
-///@{
-/****************************************************************//**
- * \brief
- * Calculate neighbors
- *
- * **Parameters**
- *
- * Input -- from within the object
- * i, j, level
- * Output -- in the object
- * nlft, nrht, nbot, ntop arrays
- *******************************************************************/
- void calc_neighbors(int ncells);
- void calc_neighbors_local(void);
-#ifdef HAVE_OPENCL
- void gpu_calc_neighbors(void);
- void gpu_calc_neighbors_local(void);
-#endif
- // TODO: Not created yet; overloading for 3D mesh support. (davis68)
- void calc_neighbors(vector<int> &nlft,
- vector<int> &nrht,
- vector<int> &nbot,
- vector<int> &ntop,
- vector<int> &nfrt,
- vector<int> &nbak,
- vector<int> index);
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Calculate rezone count
- *
- * **Parameters**
- *
- * Input
- * mpot -- potential mesh refinement
- * ioffset -- write offset for each cell
- * Output
- * result -- cell count
- *******************************************************************/
- int rezone_count(vector<int> mpot, int &icount, int &jcount);
-#ifdef HAVE_OPENCL
- void gpu_rezone_count2(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result);
- void gpu_rezone_count(size_t block_size, size_t local_work_size, cl_mem dev_redscratch, cl_mem &dev_result);
- void gpu_rezone_scan(size_t block_size, size_t local_work_size, cl_mem dev_ioffset, cl_mem &dev_result);
-#endif
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Refine Smooth -- smooths jump in refinement level so that only a 1 to 2 jump occurs
- *
- * **Parameters**
- *
- * Input/Output
- * mpot -- potential mesh refinement array, 1 is refine and -1 coarsen
- * ioffset -- write offset for each cell to account for new cells
- * result -- refinement count
- *******************************************************************/
- size_t refine_smooth(vector<int> &mpot, int &icount, int &jcount);
-#ifdef HAVE_OPENCL
- int gpu_refine_smooth(cl_mem &dev_mpot, int &icount, int &jcount);
-#endif
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Rezone mesh
- *
- * **Parameters**
- *
- * Input
- * add_ncells -- for each processor. A global sum will be done and the main part of
- * the rezone will be skipped if no cells are added.
- * mpot -- mesh rezone potential
- * have_state flag -- 0 (false) for setup when physics state has not been allocated
- * ioffset -- partial prefix scan results for starting address to write new cells
- * state_memory -- linked list of arrays for state
- * Output
- * new mesh and state arrays with refinement/coarsening performed
- *******************************************************************/
- void rezone_all(int icount, int jcount, vector<int> mpot, int have_state, MallocPlus &state_memory);
-#ifdef HAVE_OPENCL
- void gpu_rezone_all(int icount, int jcount, cl_mem &dev_mpot, MallocPlus &gpu_state_memory);
-#endif
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Load balance -- only needed for parallel (MPI) runs
- *
- * **Parameters**
- *
- * Input
- * numcells -- ncells from rezone all routine. This is a copy in so that a local
- * value can be used for load_balance and gpu_load_balance without it getting
- * reset for clamr_checkall routine
- * weight -- weighting array per cell for balancing. Currently not used. Null value
- * indicates even weighting of cells for load balance.
- * state_memory or gpu_state_memory -- linked-list of arrays from physics routine
- * to be load balanced.
- * Output -- arrays will be returned load balanced with new sizes. Pointers to arrays
- * will need to be reset
- *******************************************************************/
-#ifdef HAVE_MPI
- void do_load_balance_local(size_t numcells, float *weight, MallocPlus &state_memory);
-#ifdef HAVE_OPENCL
- int gpu_do_load_balance_local(size_t numcells, float *weight, MallocPlus &gpu_state_memory);
-#endif
-#endif
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Calculate spatial coordinates
- *
- * **Parameters**
- *
- * Input -- from within the object
- * i, j, level
- * Output
- * x, y -- coordinates for each cell
- * dx, dy -- size of each cell
- *******************************************************************/
- void calc_spatial_coordinates(int ibase);
-#ifdef HAVE_OPENCL
- void gpu_calc_spatial_coordinates(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy);
-#endif
-///@}
-
-///@{
-/****************************************************************//**
- * \brief
- * Testing routines
- *******************************************************************/
-#ifdef HAVE_OPENCL
- void compare_dev_local_to_local(void); // Not currently called
- void compare_neighbors_gpu_global_to_cpu_global(void);
-#endif
- void compare_neighbors_cpu_local_to_cpu_global(uint ncells_ghost, uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl);
-#ifdef HAVE_OPENCL
- void compare_neighbors_all_to_gpu_local(Mesh *mesh_global, int *nsizes, int *ndispl);
- void compare_mpot_gpu_global_to_cpu_global(int *mpot, cl_mem dev_mpot);
-#endif
- void compare_mpot_cpu_local_to_cpu_global(uint ncells_global, int *nsizes, int *displ, int *mpot, int *mpot_global, int cycle);
-#ifdef HAVE_OPENCL
- void compare_mpot_all_to_gpu_local(int *mpot, int *mpot_global, cl_mem dev_mpot, cl_mem dev_mpot_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle);
- void compare_ioffset_gpu_global_to_cpu_global(uint old_ncells, int *mpot);
- void compare_ioffset_all_to_gpu_local(uint old_ncells, uint old_ncells_global, int block_size, int block_size_global, int *mpot, int *mpot_global, cl_mem dev_ioffset, cl_mem dev_ioffset_global, int *ioffset, int *ioffset_global, int *celltype_global, int *i_global, int *j_global);
- void compare_coordinates_gpu_global_to_cpu_global_double(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, double *H);
- void compare_coordinates_gpu_global_to_cpu_global_float(cl_mem dev_x, cl_mem dev_dx, cl_mem dev_y, cl_mem dev_dy, cl_mem dev_H, float *H);
-#endif
- void compare_coordinates_cpu_local_to_cpu_global_double(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, double *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, double *H_global, int cycle);
- void compare_coordinates_cpu_local_to_cpu_global_float(uint ncells_global, int *nsizes, int *ndispl, spatial_t *x, spatial_t *dx, spatial_t *y, spatial_t *dy, float *H, spatial_t *x_global, spatial_t *dx_global, spatial_t *y_global, spatial_t *dy_global, float *H_global, int cycle);
-#ifdef HAVE_OPENCL
- void compare_indices_gpu_global_to_cpu_global(void);
-#endif
- void compare_indices_cpu_local_to_cpu_global(uint ncells_global, Mesh *mesh_global, int *nsizes, int *ndispl, int cycle);
-#ifdef HAVE_OPENCL
- void compare_indices_all_to_gpu_local(Mesh *mesh_global, uint ncells_global, int *nsizes, int *ndispl, int ncycle);
-#endif
-///@}
-
- size_t get_checkpoint_size(void);
- void store_checkpoint(Crux *crux);
- void restore_checkpoint(Crux *crux);
-
- void calc_celltype_threaded(size_t ncells);
- void calc_celltype(size_t ncells);
-
-private:
- // Private constructors.
- Mesh(const Mesh&); // Blocks copy constructor so copies are not made inadvertently.
-
- // Member functions.
- void print_object_info();
-
- void set_refinement_order(int order[4], int ic, int ifirst, int ilast, int jfirst, int jlast,
- int level_first, int level_last, int *i, int *j, int *level);
-
- void write_grid(int ncycle);
- void calc_centerminmax(void);
- void calc_minmax(void);
-
- void print(void);
- void print_local(void);
-#ifdef HAVE_OPENCL
- void print_dev_local();
-#endif
-
-};
-
-#endif /* MESH_H */
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/partition.cpp?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.cpp (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.cpp (removed)
@@ -1,764 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifdef HAVE_MPI
-#include "mpi.h"
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <list>
-#include <algorithm>
-#include "partition.h"
-#include "KDTree.h"
-#include "mesh.h"
-#ifdef HAVE_MPI
-#include "s7.h"
-#endif
-#include "zorder.h"
-#include "timer.h"
-#include "hsfc.h"
-
-#ifndef DEBUG
-#define DEBUG 0
-#endif
-
-typedef unsigned int uint;
-
-int measure_type;
-int meas_count = 0;
-double meas_sum_average = 0.0;
-
-extern bool localStencil;
-extern enum partition_method initial_order;
-extern enum partition_method cycle_reorder;
-
-void Mesh::partition_measure(void)
-{
- if (measure_type != NO_PARTITION_MEASURE){
-
- int ntX = TILE_SIZE;
- static double offtile_ratio = 0.0;
-
- uint num_groups = (ncells + TILE_SIZE - 1)/TILE_SIZE;
-
- if (measure_type == WITH_DUPLICATES) {
- int i = 0;
-#ifdef _OPENMP
-#pragma omp for reduction(+:offtile_ratio)
-#endif
- for (uint group_id=0; group_id < num_groups; group_id ++){
-
- int start_idx = group_id * ntX;
- int end_idx = (group_id + 1) * ntX;
-
- int offtile =0;
- for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
-
- if (i >= ncells) continue;
- //taken from wave_kern_calc.cl 'setup tile' kernel
- if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile++;
- if (level[nlft[i]] > level[i] &&
- (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile++;
- if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile++;
- if (level[nrht[i]] > level[i] &&
- (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile++;
- if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile++;
- if (level[nbot[i]] > level[i] &&
- (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile++;
- if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile++;
- if (level[ntop[i]] > level[i] &&
- (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile++;
- }
- offtile_ratio += (double)offtile/(double)(TILE_SIZE);
- //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
- }
- } else if (measure_type == WITHOUT_DUPLICATES) {
- int i = 0;
-#ifdef _OPENMP
-#pragma omp for reduction(+:offtile_ratio)
-#endif
- for (uint group_id=0; group_id < num_groups; group_id ++){
- list<int> offtile_list;
-
- int start_idx = group_id * ntX;
- int end_idx = (group_id + 1) * ntX;
-
- for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
-
- if (i >= ncells) continue;
-
- if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]);
- if (level[nlft[i]] > level[i] &&
- (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]);
- if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]);
- if (level[nrht[i]] > level[i] &&
- (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]);
- if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]);
- if (level[nbot[i]] > level[i] &&
- (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]);
- if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]);
- if (level[ntop[i]] > level[i] &&
- (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]);
- }
- offtile_list.sort();
- offtile_list.unique();
-
- offtile_ratio += (double)offtile_list.size()/(double)(TILE_SIZE);
- //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
- }
- } else if (measure_type == CVALUE) {
- int i = 0;
-#ifdef _OPENMP
-#pragma omp for reduction(+:offtile_ratio)
-#endif
- for (uint group_id=0; group_id < num_groups; group_id ++){
- list<int> offtile_list;
-
- int start_idx = group_id * ntX;
- int end_idx = (group_id + 1) * ntX;
-
- for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
-
- if (i >= ncells) continue;
-
- if (nlft[i] < start_idx || nlft[i] >= end_idx) offtile_list.push_back(nlft[i]);
- if (level[nlft[i]] > level[i] &&
- (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) offtile_list.push_back(ntop[nlft[i]]);
- if (nrht[i] < start_idx || nrht[i] >= end_idx) offtile_list.push_back(nrht[i]);
- if (level[nrht[i]] > level[i] &&
- (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) offtile_list.push_back(ntop[nrht[i]]);
- if (nbot[i] < start_idx || nbot[i] >= end_idx) offtile_list.push_back(nbot[i]);
- if (level[nbot[i]] > level[i] &&
- (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) offtile_list.push_back(nrht[nbot[i]]);
- if (ntop[i] < start_idx || ntop[i] >= end_idx) offtile_list.push_back(ntop[i]);
- if (level[ntop[i]] > level[i] &&
- (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) offtile_list.push_back(nrht[ntop[i]]);
- }
- offtile_list.sort();
- offtile_list.unique();
-
- offtile_ratio += (double)offtile_list.size()/(4*sqrt((double)(TILE_SIZE)));
- //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
- }
- } else if (measure_type == CSTARVALUE) {
- int i = 0;
-#ifdef _OPENMP
-#pragma omp for reduction(+:offtile_ratio)
-#endif
- for (uint group_id=0; group_id < num_groups; group_id ++){
- list<int> offtile_list;
- list<int> offtile_cache_lines; // Assumes memory is aligned
- int cache_line_size = 4; // Some could be 8, or more?
-
- int start_idx = group_id * ntX;
- int end_idx = (group_id + 1) * ntX;
-
- for (uint ic = 0; ic < TILE_SIZE; ic++, i++){
-
- if (i >= ncells) continue;
-
- if (nlft[i] < start_idx || nlft[i] >= end_idx) {
- offtile_list.push_back(nlft[i]);
- offtile_cache_lines.push_back(nlft[i]/cache_line_size);
- }
-
- if (level[nlft[i]] > level[i] && (ntop[nlft[i]] < start_idx || ntop[nlft[i]] >= end_idx) ) {
- offtile_list.push_back(ntop[nlft[i]]);
- offtile_cache_lines.push_back(ntop[nlft[i]]/cache_line_size);
- }
- if (nrht[i] < start_idx || nrht[i] >= end_idx) {
- offtile_list.push_back(nrht[i]);
- offtile_cache_lines.push_back(nrht[i]/cache_line_size);
- }
- if (level[nrht[i]] > level[i] && (ntop[nrht[i]] < start_idx || ntop[nrht[i]] >= end_idx) ) {
- offtile_list.push_back(ntop[nrht[i]]);
- offtile_cache_lines.push_back(ntop[nrht[i]]/cache_line_size);
- }
- if (nbot[i] < start_idx || nbot[i] >= end_idx) {
- offtile_list.push_back(nbot[i]);
- offtile_cache_lines.push_back(nbot[i]/cache_line_size);
- }
- if (level[nbot[i]] > level[i] && (nrht[nbot[i]] < start_idx || nrht[nbot[i]] >= end_idx) ) {
- offtile_list.push_back(nrht[nbot[i]]);
- offtile_cache_lines.push_back(nrht[nbot[i]]/cache_line_size);
- }
- if (ntop[i] < start_idx || ntop[i] >= end_idx) {
- offtile_list.push_back(ntop[i]);
- offtile_cache_lines.push_back(ntop[i]/cache_line_size);
- }
- if (level[ntop[i]] > level[i] && (nrht[ntop[i]] < start_idx || nrht[ntop[i]] >= end_idx) ) {
- offtile_list.push_back(nrht[ntop[i]]);
- offtile_cache_lines.push_back(nrht[ntop[i]]/cache_line_size);
- }
- }
- offtile_list.sort();
- offtile_list.unique();
- offtile_cache_lines.sort();
- offtile_cache_lines.unique();
-
- double s_ngeom = (double)(offtile_list.size());
- double q_ngeom = (double)(offtile_cache_lines.size());
- double ngeom = (double)(TILE_SIZE);
- double cover = (double)(cache_line_size);
-// offtile_ratio += (s_ngeom * q_ngeom) / (4*sqrt(ngeom)*2*(1+(ngeom+cache_line_size-1)/cache_line_size));
-// offtile_ratio += (q_ngeom) / (2*sqrt(ngeom)+2*((sqrt(ngeom)+cover-1)/cover));
-// offtile_ratio += (q_ngeom) / ( (8*sqrt(ngeom)+cover-1)/cover );
- ngeom = sqrt(ngeom);
- offtile_ratio += (s_ngeom*q_ngeom*cover) / ( 4 * ngeom * (8*ngeom+cover-1) );
-
- //printf("DEBUG Ratio of surface area to volume is equal to %d / %d ratio is %lf\n", offtile, TILE_SIZE, (double)offtile/(double)TILE_SIZE);
- }
- }
-
- // printf("DEBUG Ratio of surface area to volume is equal to %d / %d \n", offtile, ontile);
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- meas_count ++;
- meas_sum_average += offtile_ratio/(double)num_groups;
- // printf("DEBUG %d icount %d sum_average %lf\n",__LINE__,icount, sum_average);
-#ifdef _OPENMP
- }
-#endif
- } // if PARTITION TYPE
-}
-
-void Mesh::print_partition_measure()
-{
- if (meas_count != 0) {
- if (measure_type == NO_PARTITION_MEASURE) {
- if (mype == 0) printf("No Partition Measure\n");
- } else if (measure_type == WITH_DUPLICATES) {
- parallel_output("Average surface area to volume ratio ", meas_sum_average/(double)meas_count, 0, "with duplicates");
- } else if (measure_type == WITHOUT_DUPLICATES) {
- parallel_output("Average surface area to volume ratio ", meas_sum_average/(double)meas_count, 0, "without duplicates");
- } else if (measure_type == CVALUE) {
- parallel_output("Partition Quality Avg C value ", meas_sum_average/(double)meas_count, 0, "");
- } else if (measure_type == CSTARVALUE){
- parallel_output("Partition Quality Avg C* value ", meas_sum_average/(double)meas_count, 0, "");
- }
- }
-
- if (numpe > 1){
- parallel_output("The MPI surface area to volume ratio ", offtile_ratio_local, 0, "without duplicates");
- }
-}
-
-void Mesh::print_partition_type()
-{
- if (mype == 0) {
- if (initial_order == ORIGINAL_ORDER) {
- printf("Initial order is naive.");
- } else if (initial_order == HILBERT_SORT) {
- printf("Initial order is Hilbert sort.");
- } else if (initial_order == HILBERT_PARTITION) {
- printf("Initial order is Hilbert partitionr.");
- } else if (initial_order == ZORDER) {
- printf("Initial order is Z order.");
- }
-
- if (cycle_reorder == ORIGINAL_ORDER) {
- printf(" No cycle reorder.");
- } else if (cycle_reorder == HILBERT_SORT) {
- printf(" Cycle reorder is Hilbert sort.");
- } else if (cycle_reorder == HILBERT_PARTITION) {
- printf(" Cycle reorder is Hilbert partition.");
- } else if (cycle_reorder == ZORDER) {
- printf(" Cycle reorder is Z order.");
- }
-
- if (localStencil) {
- printf(" Local Stencil is on.\n");
- } else {
- printf("\n");
- }
- }
-
-}
-void Mesh::partition_cells(
- int numpe, //
- vector<int> &z_order, // Resulting index ordering.
- enum partition_method method) // Assigned partitioning method.
-{
- int *info; //
- double iscale, //
- jscale; //
- int imax, // Maximum x-index.
- jmax; // Maximum y-index.
- vector<int> z_index; // Ordered curve from hsfc.
- vector<int> i_scaled; // x-indices normalized to a scale of [0, 1] for hsfc.
- vector<int> j_scaled; // y-indices normalized to a scale of [0, 1] for hsfc.
- vector<double> iunit; //
- vector<double> junit; //
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- // Initialize ordered curve index.
- z_index.resize(ncells, 0);
- //z_order.resize(ncells, 0);
-
- if (parallel) {
-#ifdef HAVE_MPI
- nsizes.resize(numpe);
- ndispl.resize(numpe);
- MPI_Allgather(&ncells, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD);
- ndispl[0]=0;
- for (int ip=1; ip<numpe; ip++){
- ndispl[ip] = ndispl[ip-1] + nsizes[ip-1];
- }
- noffset=0;
- for (int ip=0; ip<mype; ip++){
- noffset += nsizes[ip];
- }
-#endif
- } else {
- // Adjust the number of required work items to the number of cells.
- proc.resize(ncells);
- // Decompose the domain equitably.
- calc_distribution(numpe);
- noffset = 0;
- }
-
-
- // Partition cells according to one of several possible orderings.
- int have_spatial_variables=0;
- switch (method)
- { case ORIGINAL_ORDER:
- // Set z_order to the current cell order.
- for (uint ic = 0; ic < ncells; ++ic)
- { z_order[ic] = ic; }
-
- cpu_timers[MESH_TIMER_PARTITION] += cpu_timer_stop(tstart_cpu);
-
- return;
- break;
-
- case HILBERT_SORT:
- // Resort the curve by Hilbert order.
- have_spatial_variables = 1;
- if (x.size() < ncells) {
- calc_spatial_coordinates(0);
- have_spatial_variables = 0;
- }
- calc_centerminmax();
- iunit.resize(ncells);
- junit.resize(ncells);
-
- // Get the range of values in the x- and y-directions and make the scale square.
- iscale = 1.0 / (xcentermax - xcentermin);
- jscale = 1.0 / (ycentermax - ycentermin);
-
- // Scale the indices to a normalized [0, 1] range for hsfc.
- for (uint ic = 0; ic < ncells; ++ic){
- iunit[ic] = (x[ic] + 0.5 * dx[ic] - xcentermin) * iscale;
- junit[ic] = (y[ic] + 0.5 * dy[ic] - ycentermin) * jscale;
- }
-
- if (have_spatial_variables == 0){
- x.clear();
- dx.clear();
- y.clear();
- dy.clear();
- }
-
- if (parallel){
-#ifdef HAVE_MPI
- info = (int *)malloc(sizeof(int) * 3 * ncells_global);
- vector<double>iunit_global(ncells_global);
- vector<double>junit_global(ncells_global);
- vector<int>z_order_global(ncells_global);
-
- MPI_Allgatherv(&iunit[0], ncells, MPI_DOUBLE, &iunit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
- MPI_Allgatherv(&junit[0], ncells, MPI_DOUBLE, &junit_global[0], &nsizes[0], &ndispl[0], MPI_DOUBLE, MPI_COMM_WORLD);
- // Sort the mesh into an ordered space-filling curve from hsfc.
- hsfc2sort(ncells_global, &iunit_global[0], &junit_global[0], 0, info, 1);
-
- // Copy the cell order information from info into z_order.
- for (uint ic = 0; ic < ncells_global; ++ic)
- { z_order_global[ic] = info[ic]; }
- free(info);
-
- // Order the mesh according to the calculated order (note that z_order is for both curves).
- vector<int> int_global(ncells_global);
- vector<int> int_global_new(ncells_global);
-
- // gather, reorder and scatter i
- MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = int_global[z_order_global[ic]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- // gather, reorder and scatter j
- MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = int_global[z_order_global[ic]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- // gather, reorder and scatter level
- MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = int_global[z_order_global[ic]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- // It is faster just to recalculate these variables instead of communicating them
- if (mesh_memory.get_memory_size(celltype) >= ncells) {
- calc_celltype(mesh_memory.get_memory_size(celltype));
- }
-
- if (have_spatial_variables) {
- calc_spatial_coordinates(0);
- }
-
- if (mesh_memory.get_memory_size(nlft) >= ncells) {
- vector<int> inv_z_order(ncells_global);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- inv_z_order[z_order_global[ic]] = ic;
- }
-
- MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
- }
-
- MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_INT, &z_order[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-#endif
- } else {
- info = (int *)malloc(sizeof(int) * 3 * ncells);
-
- // Sort the mesh into an ordered space-filling curve from hsfc.
- hsfc2sort(ncells, &iunit[0], &junit[0], 0, info, 1);
-
- // Copy the cell order information from info into z_order.
- for (uint ic = 0; ic < ncells; ++ic)
- { z_order[ic] = info[ic]; }
- free(info);
-
- // Order the mesh according to the calculated order (note that z_order is for both curves).
- vector<int> int_local(ncells);
-
- mesh_memory.set_memory_attribute(nlft, 0x100);
- mesh_memory.set_memory_attribute(nrht, 0x100);
- mesh_memory.set_memory_attribute(nbot, 0x100);
- mesh_memory.set_memory_attribute(ntop, 0x100);
-
- mesh_memory.memory_reorder_all(&z_order[0]);
- memory_reset_ptrs();
-
- if (x.size() >= ncells) {
- vector<spatial_t> real_local(ncells);
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = x[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- x[ic] = real_local[z_order[ic]];
- }
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = dx[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- dx[ic] = real_local[z_order[ic]];
- }
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = y[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- y[ic] = real_local[z_order[ic]];
- }
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = dy[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- dy[ic] = real_local[z_order[ic]];
- }
- }
-
- }
-
- break;
-
- case ZORDER:
- // Resort the curve by z-order.
- if (parallel) {
-#ifdef HAVE_MPI
- vector<int>i_global(ncells_global);
- vector<int>j_global(ncells_global);
- vector<int>level_global(ncells_global);
- vector<int>z_index_global(ncells_global);
- vector<int>z_order_global(ncells_global);
- MPI_Allgatherv(&i[0], ncells, MPI_REAL, &i_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
- MPI_Allgatherv(&j[0], ncells, MPI_REAL, &j_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
- MPI_Allgatherv(&level[0], ncells, MPI_REAL, &level_global[0], &nsizes[0], &ndispl[0], MPI_REAL, MPI_COMM_WORLD);
-
- i_scaled.resize(ncells_global);
- j_scaled.resize(ncells_global);
-
- //
- imax = 0;
- jmax = 0;
- for (uint ic = 0; ic < ncells_global; ++ic)
- { if (i_global[ic] > imax) imax = i_global[ic];
- if (j_global[ic] > jmax) jmax = j_global[ic]; }
-
- //
- iscale = 16.0 / (double)imax;
- jscale = 16.0 / (double)jmax;
-
- //
- for (uint ic = 0; ic < ncells_global; ++ic)
- { i_scaled[ic]=(int) ( (double)i_global[ic]*iscale);
- j_scaled[ic]=(int) ( (double)j_global[ic]*jscale); }
-
- //
- calc_zorder(ncells_global, &i_scaled[0], &j_scaled[0], &level_global[0], levmx, ibase, &z_index_global[0], &z_order_global[0]);
-
- // Order the mesh according to the calculated order (note that z_order is for both curves).
- vector<int> int_global(ncells_global);
- vector<int> int_global_new(ncells_global);
-
- // gather, reorder and scatter i
- MPI_Allgatherv(&i[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = int_global[z_order_global[ic]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &i[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- // gather, reorder and scatter j
- MPI_Allgatherv(&j[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = int_global[z_order_global[ic]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &j[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- // gather, reorder and scatter level
- MPI_Allgatherv(&level[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = int_global[z_order_global[ic]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &level[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- // It is faster just to recalculate these variables instead of communicating them
- if (mesh_memory.get_memory_size(celltype) >= ncells) {
- calc_celltype(mesh_memory.get_memory_size(celltype));
- }
-
- if (x.size() >= ncells) {
- calc_spatial_coordinates(0);
- }
-
- if (mesh_memory.get_memory_size(nlft) >= ncells) {
- vector<int> inv_z_order(ncells_global);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- inv_z_order[z_order_global[ic]] = ic;
- }
-
- MPI_Allgatherv(&nlft[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nlft[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- MPI_Allgatherv(&nrht[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nrht[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- MPI_Allgatherv(&nbot[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &nbot[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
-
- MPI_Allgatherv(&ntop[0], ncells, MPI_INT, &int_global[0], &nsizes[0], &ndispl[0], MPI_INT, MPI_COMM_WORLD);
- for (int ic = 0; ic<(int)ncells_global; ic++){
- int_global_new[ic] = inv_z_order[int_global[z_order_global[ic]]];
- }
- MPI_Scatterv(&int_global_new[0], &nsizes[0], &ndispl[0], MPI_INT, &ntop[0], ncells, MPI_INT, 0, MPI_COMM_WORLD);
- }
- MPI_Scatterv(&z_order_global[0], &nsizes[0], &ndispl[0], MPI_REAL, &z_order[0], ncells, MPI_REAL, 0, MPI_COMM_WORLD);
-#endif
- } else {
- i_scaled.resize(ncells);
- j_scaled.resize(ncells);
-
- //
- imax = 0;
- jmax = 0;
- for (uint ic = 0; ic < ncells; ++ic)
- { if (i[ic] > imax) imax = i[ic];
- if (j[ic] > jmax) jmax = j[ic]; }
-
- //
- iscale = 16.0 / (double)imax;
- jscale = 16.0 / (double)jmax;
-
- //
- for (uint ic = 0; ic < ncells; ++ic)
- { i_scaled[ic]=(int) ( (double)i[ic]*iscale);
- j_scaled[ic]=(int) ( (double)j[ic]*jscale); }
-
- //
- calc_zorder(ncells, &i_scaled[0], &j_scaled[0], &level[0], levmx, ibase, &z_index[0], &z_order[0]);
-
- // Order the mesh according to the calculated order (note that z_order is for both curves).
- vector<int> int_local(ncells);
-
- mesh_memory.set_memory_attribute(nlft, 0x100);
- mesh_memory.set_memory_attribute(nrht, 0x100);
- mesh_memory.set_memory_attribute(nbot, 0x100);
- mesh_memory.set_memory_attribute(ntop, 0x100);
-
- mesh_memory.memory_reorder_all(&z_order[0]);
- memory_reset_ptrs();
-
-
- if (x.size() >= ncells) {
- vector<spatial_t> real_local(ncells);
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = x[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- x[ic] = real_local[z_order[ic]];
- }
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = dx[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- dx[ic] = real_local[z_order[ic]];
- }
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = y[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- y[ic] = real_local[z_order[ic]];
- }
-
- for (int ic = 0; ic<(int)ncells; ic++){
- real_local[ic] = dy[ic];
- }
- for (int ic = 0; ic<(int)ncells; ic++){
- dy[ic] = real_local[z_order[ic]];
- }
- }
-
- }
-
- break;
-
- default:
- // Note that HILBERT_PARTITION is not currently supported due to redundancy with HILBERT_SORT.
- break;
- }
-
-
- // Output ordered mesh information.
- if (DEBUG)
- { printf("orig index i j lev nlft nrht nbot ntop xlow xhigh ylow yhigh z index z order\n");
- for (uint ic=0; ic<ncells; ic++){
- printf(" %6d %4d %4d %4d %4d %4d %4d %4d ", index[ic], j[ic], i[ic], level[ic], nlft[ic], nrht[ic], nbot[ic], ntop[ic]);
- printf(" %8.2lf %8.2lf %8.2lf %8.2lf", x[ic], x[ic]+dx[ic], y[ic], y[ic]+dy[ic]);
- printf(" %6d %5d\n", z_index[ic], z_order[ic]); } }
-
- cpu_timers[MESH_TIMER_PARTITION] += cpu_timer_stop(tstart_cpu);
-}
-
-// The distribution needs to be modified in order to spread out extra cells equitably among the work items.
-void Mesh::calc_distribution(int numpe)
-{
- uint lsize = 0; //
- for (int ip = 0; ip < numpe; ++ip) {
- lsize += proc.size()/numpe;
- if (ip < (int)proc.size()%numpe) lsize++;
- for (int ic = 0; ic < (int)lsize; ic++) {
- proc[ic] = ip;
- }
- }
-}
-
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/partition.h?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/partition.h (removed)
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifndef PARTITION_H
-#define PARTITION_H
-
-#include <vector>
-
-#include "input.h"
-
-using namespace std;
-
-enum partition_method {
- ORIGINAL_ORDER,
- HILBERT_SORT,
- HILBERT_PARTITION,
- ZORDER
-};
-
-enum partition_measure {
- NO_PARTITION_MEASURE,
- WITH_DUPLICATES,
- WITHOUT_DUPLICATES,
- CVALUE,
- CSTARVALUE
-};
-
-
-void calc_distribution(int numpe, vector<int> &proc);
-//void partition_cells(int numpe, vector<int> &proc, Mesh &mesh, enum partition_method method);
-
-typedef void (*maptonorm)( double * , double * , void * );
-
-extern "C" void hsfc2sort(
- const int N , /* IN: Number of points */
- const double * X , /* IN: array of X-Coordinates */
- const double * Y , /* IN: array of Y-Coordinates */
- const int ibase , /* IN: Stride for Y array */
- int * Info , /* OUT: (1 <= LDInfo) [ HSFC ordering ]
- (2 <= LDInfo) [ HSFC index, #1 ]
- (3 <= LDInfo) [ HSFC index, #2 ] */
- int LDInfo /* IN: Leading dimension of Info */
- );
-
-extern "C" void hsfc2part(
- const int Level , /* IN: Background grid level of partitioning */
- const int Limit , /* IN: Number of levels to consider for 'gaps' */
- const int NPart , /* IN: Target number of partitions */
- const int N , /* IN: Number of points */
- const double * X , /* IN: array of X-Coordinates */
- const double * Y , /* IN: array of Y-Coordinates */
- const int ibase , /* IN: Base - 0 for C, 1 for Fortran */
- int * Info , /* IN: Array of computational weights,
- OUT: (1 <= LDInfo) [ Partitioning ]
- (2 <= LDInfo) [ Adjusted HSFC ordering ]
- (3 <= LDInfo) [ Original HSFC index, #1 ]
- (4 <= LDInfo) [ Original HSFC index, #2 ] */
- int LDInfo );/* IN: Leading dimension of Info */
-
-
-#endif /* PARTITION_H */
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/reduce.c?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.c (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.c (removed)
@@ -1,245 +0,0 @@
-/**
- * Copyright (c) 2011, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#include "reduce.h"
-#ifdef HAVE_OPENCL
-#include "ezcl/ezcl.h"
-#endif
-
-#ifdef HAVE_OPENCL
-#include "reduce_kernel.inc"
-#endif
-
-void init_kernels_reduce(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_sum = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_cl");
- kernel_reduce_sum_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage1of2_cl");
- kernel_reduce_sum_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage2of2_cl");
- kernel_reduce_sum_int_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage1of2_cl");
- kernel_reduce_sum_int_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage2of2_cl");
- kernel_reduce_product = ezcl_create_kernel_wsource(context, reduce_source, "reduce_product_cl");
- kernel_reduce_max = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_cl");
- kernel_reduce_max_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage1of2_cl");
- kernel_reduce_max_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage2of2_cl");
- kernel_reduce_min = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_cl");
- kernel_reduce_min_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage1of2_cl");
- kernel_reduce_min_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage2of2_cl");
-#endif
-}
-
-void init_kernel_sum(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_sum = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_cl");
-#endif
-}
-
-void init_kernel_2stage_sum(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_sum_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage1of2_cl");
- kernel_reduce_sum_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_stage2of2_cl");
-#endif
-}
-
-void terminate_kernel_2stage_sum(void)
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_sum_stage1of2);
- ezcl_kernel_release(kernel_reduce_sum_stage2of2);
-#endif
-}
-
-void init_kernel_2stage_sum_int(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_sum_int_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage1of2_cl");
- kernel_reduce_sum_int_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_sum_int_stage2of2_cl");
-#endif
-}
-
-void terminate_kernel_2stage_sum_int(void)
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_sum_int_stage1of2);
- ezcl_kernel_release(kernel_reduce_sum_int_stage2of2);
-#endif
-}
-
-void init_kernel_product(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_product = ezcl_create_kernel_wsource(context, reduce_source, "reduce_product_cl");
-#endif
-}
-
-void init_kernel_max(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_max = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_cl");
-#endif
-}
-
-void init_kernel_2stage_max(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_max_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage1of2_cl");
- kernel_reduce_max_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_max_stage2of2_cl");
-#endif
-}
-
-void init_kernel_min(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_min = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_cl");
-#endif
-}
-
-void init_kernel_2stage_min(void)
-{
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
- kernel_reduce_min_stage1of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage1of2_cl");
- kernel_reduce_min_stage2of2 = ezcl_create_kernel_wsource(context, reduce_source, "reduce_min_stage2of2_cl");
-#endif
-}
-
-void release_kernels_reduce()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_sum);
- ezcl_kernel_release(kernel_reduce_sum_stage1of2);
- ezcl_kernel_release(kernel_reduce_sum_stage2of2);
- ezcl_kernel_release(kernel_reduce_sum_int_stage1of2);
- ezcl_kernel_release(kernel_reduce_sum_int_stage2of2);
- ezcl_kernel_release(kernel_reduce_product);
- ezcl_kernel_release(kernel_reduce_max);
- ezcl_kernel_release(kernel_reduce_max_stage1of2);
- ezcl_kernel_release(kernel_reduce_max_stage2of2);
- ezcl_kernel_release(kernel_reduce_min);
- ezcl_kernel_release(kernel_reduce_min_stage1of2);
- ezcl_kernel_release(kernel_reduce_min_stage2of2);
-#endif
-}
-
-void release_kernel_sum()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_sum);
-#endif
-}
-
-void release_kernel_2stage_sum()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_sum_stage1of2);
- ezcl_kernel_release(kernel_reduce_sum_stage2of2);
-#endif
-}
-
-void release_kernel_2stage_sum_int()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_sum_int_stage1of2);
- ezcl_kernel_release(kernel_reduce_sum_int_stage2of2);
-#endif
-}
-
-void release_kernel_product()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_product);
-#endif
-}
-
-void release_kernel_max()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_max);
-#endif
-}
-
-void release_kernel_2stage_max()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_max_stage1of2);
- ezcl_kernel_release(kernel_reduce_max_stage2of2);
-#endif
-}
-
-void release_kernel_min()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_min);
-#endif
-}
-
-void release_kernel_2stage_min()
-{
-#ifdef HAVE_OPENCL
- ezcl_kernel_release(kernel_reduce_min_stage1of2);
- ezcl_kernel_release(kernel_reduce_min_stage2of2);
-#endif
-}
-
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/reduce.h?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/reduce.h (removed)
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifndef _REDUCE_H_
-#define _REDUCE_H_
-
-#ifdef HAVE_OPENCL
-#ifdef __APPLE_CC__
-#include <OpenCL/OpenCL.h>
-#else
-#include "CL/cl.h"
-#endif
-#endif
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#ifdef HAVE_OPENCL
-cl_kernel kernel_reduce_sum,
- kernel_reduce_sum_stage1of2,
- kernel_reduce_sum_stage2of2,
- kernel_reduce_sum_int_stage1of2,
- kernel_reduce_sum_int_stage2of2,
- kernel_reduce_product,
- kernel_reduce_max,
- kernel_reduce_max_stage1of2,
- kernel_reduce_max_stage2of2,
- kernel_reduce_min,
- kernel_reduce_min_stage1of2,
- kernel_reduce_min_stage2of2;
-#endif
-
-void init_kernels_reduce(void);
-void init_kernel_sum(void);
-void init_kernel_2stage_sum(void);
-void init_kernel_2stage_sum_int(void);
-void init_kernel_product(void);
-void init_kernel_max(void);
-void init_kernel_2stage_max(void);
-void init_kernel_min(void);
-void init_kernel_2stage_min(void);
-
-void terminate_kernel_2stage_sum(void);
-void terminate_kernel_2stage_sum_int(void);
-
-void release_kernels_reduce();
-void release_kernel_sum();
-void release_kernel_2stage_sum();
-void release_kernel_2stage_sum_int();
-void release_kernel_product();
-void release_kernel_max();
-void release_kernel_2stage_max();
-void release_kernel_min();
-void release_kernel_2stage_min();
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _REDUCE_H_ */
-
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/s7.c?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.c (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.c (removed)
@@ -1,977 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include "s7.h"
-
-void S7_Sort(
- void * array_in,
- const int nsize,
- const enum S7_Datatype S7_datatype
- )
-{
- int n, child, parent, i;
-
- int qint;
- long qlong;
- long long qlonglong;
- float qfloat;
- double qdouble;
-
- int
- *int_data_ptr;
- long
- *long_data_ptr;
- long long
- *longlong_data_ptr;
- float
- *float_data_ptr;
- double
- *double_data_ptr;
-
- // Heapsort
-
- i=nsize/2;
- n = nsize;
-
- switch (S7_datatype){
- case S7_INTEGER4:
- case S7_INT:
- int_data_ptr = (int *)array_in;
-
- for (;;) {
- if (i > 0) {
- qint=int_data_ptr[--i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
- break; // End the sort here!
- } // if n
- qint=int_data_ptr[n];
- int_data_ptr[n]=int_data_ptr[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && int_data_ptr[child+1] > int_data_ptr[child]) child++;
- if (int_data_ptr[child] > qint) {
- int_data_ptr[parent] = int_data_ptr[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break; // Break out of sift while loop
- } // else
- } // while
- int_data_ptr[parent]=qint;
- } // for
-
- break;
-
- case S7_LONG:
- long_data_ptr = (long *)array_in;
-
- for (;;) {
- if (i > 0) {
- qlong=long_data_ptr[--i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
- break; // End the sort here!
- } // if n
- qlong=long_data_ptr[n];
- long_data_ptr[n]=long_data_ptr[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && long_data_ptr[child+1] > long_data_ptr[child]) child++;
- if (long_data_ptr[child] > qlong) {
- long_data_ptr[parent] = long_data_ptr[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break; // Break out of sift while loop
- } // else
- } // while
- long_data_ptr[parent]=qlong;
- } // for
-
- break;
-
- case S7_LONG_LONG_INT:
- case S7_INTEGER8:
- longlong_data_ptr = (long long *)array_in;
-
- for (;;) {
- if (i > 0) {
- qlonglong=longlong_data_ptr[--i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
- break; // End the sort here!
- } // if n
- qlonglong=longlong_data_ptr[n];
- longlong_data_ptr[n]=longlong_data_ptr[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && longlong_data_ptr[child+1] > longlong_data_ptr[child]) child++;
- if (longlong_data_ptr[child] > qlonglong) {
- longlong_data_ptr[parent] = longlong_data_ptr[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break; // Break out of sift while loop
- } // else
- } // while
- longlong_data_ptr[parent]=qlonglong;
- } // for
-
- break;
-
- case S7_FLOAT:
- case S7_REAL4:
- float_data_ptr = (float *)array_in;
-
- for (;;) {
- if (i > 0) {
- qfloat=float_data_ptr[--i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
- break; // End the sort here!
- } // if n
- qfloat=float_data_ptr[n];
- float_data_ptr[n]=float_data_ptr[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && float_data_ptr[child+1] > float_data_ptr[child]) child++;
- if (float_data_ptr[child] > qfloat) {
- float_data_ptr[parent] = float_data_ptr[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break; // Break out of sift while loop
- } // else
- } // while
- float_data_ptr[parent]=qfloat;
- } // for
-
- break;
-
- case S7_DOUBLE:
- case S7_REAL8:
- double_data_ptr = (double *)array_in;
-
- for (;;) {
- if (i > 0) {
- qdouble=double_data_ptr[--i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
- break; // End the sort here!
- } // if n
- qdouble=double_data_ptr[n];
- double_data_ptr[n]=double_data_ptr[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && double_data_ptr[child+1] > double_data_ptr[child]) child++;
- if (double_data_ptr[child] > qdouble) {
- double_data_ptr[parent] = double_data_ptr[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break; // Break out of sift while loop
- } // else
- } // while
- double_data_ptr[parent]=qdouble;
- } // for
-
- break;
-
- default:
- printf("Error -- S7_Datatype not supported in S7_Sort\n");
- exit(1);
- break;
-
- }
-}
-
-
-void S7_Sort_2Arrays(
- void * array_in1,
- void * array_in2,
- const int nsize,
- const enum S7_Datatype S7_datatype
- )
-{
- int n, child, parent, i;
-
- int qint1, qint2;
- long qlong1, qlong2;
- long long qlonglong1, qlonglong2;
- float qfloat1, qfloat2;
- double qdouble1, qdouble2;
-
- int
- *int_data_ptr1, *int_data_ptr2;
- long
- *long_data_ptr1, *long_data_ptr2;
- long long
- *longlong_data_ptr1, *longlong_data_ptr2;
- float
- *float_data_ptr1, *float_data_ptr2;
- double
- *double_data_ptr1, *double_data_ptr2;
-
- // Heapsort
-
- i=nsize/2;
- n = nsize;
-
- switch (S7_datatype){
- case S7_INTEGER4:
- case S7_INT:
- int_data_ptr1 = (int *)array_in1;
- int_data_ptr2 = (int *)array_in2;
-
- for (;;) {
- if (i > 0) {
- qint1=int_data_ptr1[--i];
- qint2=int_data_ptr2[i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return; // End of sort
- } // if n
- qint1=int_data_ptr1[n];
- qint2=int_data_ptr2[n];
- int_data_ptr1[n]=int_data_ptr1[0];
- int_data_ptr2[n]=int_data_ptr2[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && int_data_ptr1[child+1] > int_data_ptr1[child]) child++;
- if (int_data_ptr1[child] > qint1) {
- int_data_ptr1[parent] = int_data_ptr1[child];
- int_data_ptr2[parent] = int_data_ptr2[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- int_data_ptr1[parent]=qint1;
- int_data_ptr2[parent]=qint2;
- } // for
- break;
-
- case S7_LONG:
- long_data_ptr1 = (long *)array_in1;
- long_data_ptr2 = (long *)array_in2;
-
- for (;;) {
- if (i > 0) {
- qlong1=long_data_ptr1[--i];
- qlong2=long_data_ptr2[i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return; // End of sort
- } // if n
- qlong1=long_data_ptr1[n];
- qlong2=long_data_ptr2[n];
- long_data_ptr1[n]=long_data_ptr1[0];
- long_data_ptr2[n]=long_data_ptr2[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && long_data_ptr1[child+1] > long_data_ptr1[child]) child++;
- if (long_data_ptr1[child] > qlong1) {
- long_data_ptr1[parent] = long_data_ptr1[child];
- long_data_ptr2[parent] = long_data_ptr2[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- long_data_ptr1[parent]=qlong1;
- long_data_ptr2[parent]=qlong2;
- } // for
- break;
-
- case S7_LONG_LONG_INT:
- case S7_INTEGER8:
- longlong_data_ptr1 = (long long *)array_in1;
- longlong_data_ptr2 = (long long *)array_in2;
-
- for (;;) {
- if (i > 0) {
- qlonglong1=longlong_data_ptr1[--i];
- qlonglong2=longlong_data_ptr2[i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return; // End of sort
- } // if n
- qlonglong1=longlong_data_ptr1[n];
- qlonglong2=longlong_data_ptr2[n];
- longlong_data_ptr1[n]=longlong_data_ptr1[0];
- longlong_data_ptr2[n]=longlong_data_ptr2[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && longlong_data_ptr1[child+1] > longlong_data_ptr1[child]) child++;
- if (longlong_data_ptr1[child] > qlonglong1) {
- longlong_data_ptr1[parent] = longlong_data_ptr1[child];
- longlong_data_ptr2[parent] = longlong_data_ptr2[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- longlong_data_ptr1[parent]=qlonglong1;
- longlong_data_ptr2[parent]=qlonglong2;
- } // for
- break;
-
- case S7_FLOAT:
- case S7_REAL4:
- float_data_ptr1 = (float *)array_in1;
- float_data_ptr2 = (float *)array_in2;
-
- for (;;) {
- if (i > 0) {
- qfloat1=float_data_ptr1[--i];
- qfloat2=float_data_ptr2[i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return; // End of sort
- } // if n
- qfloat1=float_data_ptr1[n];
- qfloat2=float_data_ptr2[n];
- float_data_ptr1[n]=float_data_ptr1[0];
- float_data_ptr2[n]=float_data_ptr2[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && float_data_ptr1[child+1] > float_data_ptr1[child]) child++;
- if (float_data_ptr1[child] > qfloat1) {
- float_data_ptr1[parent] = float_data_ptr1[child];
- float_data_ptr2[parent] = float_data_ptr2[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- float_data_ptr1[parent]=qfloat1;
- float_data_ptr2[parent]=qfloat2;
- } // for
- break;
-
- case S7_DOUBLE:
- case S7_REAL8:
- double_data_ptr1 = (double *)array_in1;
- double_data_ptr2 = (double *)array_in2;
-
- for (;;) {
- if (i > 0) {
- qdouble1=double_data_ptr1[--i];
- qdouble2=double_data_ptr2[i];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return; // End of sort
- } // if n
- qdouble1=double_data_ptr1[n];
- qdouble2=double_data_ptr2[n];
- double_data_ptr1[n]=double_data_ptr1[0];
- double_data_ptr2[n]=double_data_ptr2[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && double_data_ptr1[child+1] > double_data_ptr1[child]) child++;
- if (double_data_ptr1[child] > qdouble1) {
- double_data_ptr1[parent] = double_data_ptr1[child];
- double_data_ptr2[parent] = double_data_ptr2[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- double_data_ptr1[parent]=qdouble1;
- double_data_ptr2[parent]=qdouble2;
- } // for
- break;
-
- default:
- printf("Error -- S7_Datatype not supported in S7_Sort\n");
- exit(1);
- break;
- }
-}
-
-
-
-
-
-void S7_Index_Sort(
- void * array_in,
- const int nsize,
- const enum S7_Datatype S7_datatype,
- int * index
- )
-{
- int n, j, child, parent, i;
- int indext;
-
- int qint;
- long qlong;
- long long qlonglong;
- float qfloat;
- double qdouble;
-
- int
- *int_data_ptr;
- long
- *long_data_ptr;
- long long
- *longlong_data_ptr;
- float
- *float_data_ptr;
- double
- *double_data_ptr;
-
- // Heapsort
-
- // Initialize array with consecutive integers
- for (j=0; j<nsize; j++) index[j]=j;
-
- i=nsize/2;
- n = nsize;
-
- switch (S7_datatype){
- case S7_INTEGER4:
- case S7_INT:
- int_data_ptr = (int *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qint=int_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qint=int_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && int_data_ptr[index[child+1]] > int_data_ptr[index[child]]) child++;
- if (int_data_ptr[index[child]] > qint) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- case S7_LONG:
- long_data_ptr = (long *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qlong=long_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qlong=long_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && long_data_ptr[index[child+1]] > long_data_ptr[index[child]]) child++;
- if (long_data_ptr[index[child]] > qlong) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- case S7_LONG_LONG_INT:
- case S7_INTEGER8:
- longlong_data_ptr = (long long *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qlonglong=longlong_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qlonglong=longlong_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && longlong_data_ptr[index[child+1]] > longlong_data_ptr[index[child]]) child++;
- if (longlong_data_ptr[index[child]] > qlonglong) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- case S7_FLOAT:
- case S7_REAL4:
- float_data_ptr = (float *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qfloat=float_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qfloat=float_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && float_data_ptr[index[child+1]] > float_data_ptr[index[child]]) child++;
- if (float_data_ptr[index[child]] > qfloat) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
-
- case S7_DOUBLE:
- case S7_REAL8:
- double_data_ptr = (double *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qdouble=double_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qdouble=double_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && double_data_ptr[index[child+1]] > double_data_ptr[index[child]]) child++;
- if (double_data_ptr[index[child]] > qdouble) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- default:
- printf("Error -- S7_Datatype not supported in S7_Index_Sort\n");
- exit(1);
- break;
-
- }
-}
-
-void S7_Indexi8_Sort(
- void * array_in,
- const int nsize,
- const enum S7_Datatype S7_datatype,
- long * index
- )
-{
- int n, j, child, parent, i;
- long indext;
-
- int qint;
- long qlong;
- long long qlonglong;
- float qfloat;
- double qdouble;
-
- int
- *int_data_ptr;
- long
- *long_data_ptr;
- long long
- *longlong_data_ptr;
- float
- *float_data_ptr;
- double
- *double_data_ptr;
-
- // Heapsort
-
- // Initialize array with consecutive integers
- for (j=0; j<nsize; j++) index[j]=j;
-
- i=nsize/2;
- n = nsize;
-
- switch (S7_datatype){
- case S7_INTEGER4:
- case S7_INT:
- int_data_ptr = (int *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qint=int_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qint=int_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && int_data_ptr[index[child+1]] > int_data_ptr[index[child]]) child++;
- if (int_data_ptr[index[child]] > qint) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- case S7_LONG:
- long_data_ptr = (long *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qlong=long_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qlong=long_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && long_data_ptr[index[child+1]] > long_data_ptr[index[child]]) child++;
- if (long_data_ptr[index[child]] > qlong) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- case S7_LONG_LONG_INT:
- case S7_INTEGER8:
- longlong_data_ptr = (long long *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qlonglong=longlong_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qlonglong=longlong_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && longlong_data_ptr[index[child+1]] > longlong_data_ptr[index[child]]) child++;
- if (longlong_data_ptr[index[child]] > qlonglong) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- case S7_FLOAT:
- case S7_REAL4:
- float_data_ptr = (float *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qfloat=float_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qfloat=float_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && float_data_ptr[index[child+1]] > float_data_ptr[index[child]]) child++;
- if (float_data_ptr[index[child]] > qfloat) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
-
- case S7_DOUBLE:
- case S7_REAL8:
- double_data_ptr = (double *)array_in;
-
- for (;;) {
- if (i > 0) {
- indext=index[--i];
- qdouble=double_data_ptr[indext];
- } // if i > 0
- else {
- n--;
- if (n == 0) {
-
- return;
- } // if n
- indext=index[n];
- qdouble=double_data_ptr[indext];
- index[n]=index[0];
- } // else
-
- parent=i;
- child = i*2 + 1;
- while (child < n) {
- if (child +1 < n && double_data_ptr[index[child+1]] > double_data_ptr[index[child]]) child++;
- if (double_data_ptr[index[child]] > qdouble) {
- index[parent] = index[child];
- parent=child;
- child = parent*2 + 1;
- } // if q
- else {
- break;
- } // else
- } // while
- index[parent]=indext;
- } // for
- break;
-
- default:
- printf("Error -- S7_Datatype not supported in S7_Indexi8_Sort\n");
- exit(1);
- break;
-
- }
-}
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/s7.h?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/s7.h (removed)
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifndef S7_H_
-#define S7_H_
-
-//#define _S7_DEBUG
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
- /*
- * Some S7 parameters.
- */
-
-#define S7_OK 0 /* Successful return. */
-
- enum S7_Datatype
- {
- S7_GENERIC8 = 0,
- S7_BYTE,
- S7_PACKED,
-
- S7_CHAR,
- S7_INT,
- S7_LONG,
- S7_LONG_LONG_INT,
- S7_FLOAT,
- S7_DOUBLE,
-
- S7_CHARACTER,
- S7_LOGICAL,
- S7_INTEGER4,
- S7_INTEGER8,
- S7_REAL4,
- S7_REAL8,
-
- S7_DATATYPE_MIN = S7_GENERIC8,
- S7_DATATYPE_MAX = S7_REAL8
- };
-
-
- void S7_Sort(
- void *array_in,
- const int nsize,
- const enum S7_Datatype S7_datatype
- );
-
- void S7_Sort_2Arrays(
- void * array_in1,
- void * array_in2,
- const int nsize,
- const enum S7_Datatype S7_datatype
- );
-
- void S7_Index_Sort(
- void * array_in,
- const int nsize,
- const enum S7_Datatype S7_datatype,
- int * index
- );
-
- void S7_Indexi8_Sort(
- void * array_in,
- const int nsize,
- const enum S7_Datatype S7_datatype,
- long * index
- );
-
-
- void S7_Index_sort_real8(const int n,double array_in[],int index[]);
- void S7_Index_sort_int8(const int n,long long iarray_in[], int index[]);
- void S7_Index_sort_int4(const int n, int iarray_in[], int index[]);
- void S7_Index_sort_real8_int8(const int n,double array_in[],long long index[]);
-
- void S7_Index_sort_int8_int8(const int n,long long iarray_in[], long long index[]);
- void S7_Index_sort_int4_int8(const int n, int iarray_in[], long long index[]);
- void S7_Sort_real8(const int n,double array_in[]);
- void S7_Sort_int8(const int n,long long array_in[]);
- void S7_Sort_int4(const int n,int array_in[]);
- void S7_Sort_real8_real8(const int n,double array_in[],double array_in2[]);
- void S7_Sort_int8_int8(const int n,long long array_in[],long long array_in2[]);
- void S7_Sort_int4_int4(const int n,int array_in[],int array_in2[]);
-
- /*
- * End prototypes.
- */
-
- /*
- * remove typesafe linkage if compiling under c++
- */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* S7_H */
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.cpp
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/state.cpp?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.cpp (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.cpp (removed)
@@ -1,3966 +0,0 @@
-/*
- * Copyright (c) 2011-2013, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#include "mesh.h"
-#include <unistd.h>
-#include <stdio.h>
-#include <assert.h>
-#include <algorithm>
-#include <queue>
-#include "state.h"
-#include "timer.h"
-#ifdef HAVE_MPI
-#include <mpi.h>
-#endif
-
-#undef DEBUG
-//#define DEBUG 0
-#define DEBUG_RESTORE_VALS 1
-#define TIMING_LEVEL 2
-
-#if defined(MINIMUM_PRECISION)
-#define ZERO 0.0f
-#define ONE 1.0f
-#define HALF 0.5f
-#define EPSILON 1.0f-30
-#define STATE_EPS 15.0
-// calc refine is done in single precision
-#define REFINE_GRADIENT 0.10f
-#define COARSEN_GRADIENT 0.05f
-#define REFINE_HALF 0.5f
-#define REFINE_NEG_THOUSAND -1000.0f
-
-#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
-#define ZERO 0.0
-#define ONE 1.0
-#define HALF 0.5
-#define EPSILON 1.0e-30
-#define STATE_EPS .02
-// calc refine is done in single precision
-#define REFINE_GRADIENT 0.10f
-#define COARSEN_GRADIENT 0.05f
-#define REFINE_HALF 0.5f
-#define REFINE_NEG_THOUSAND -1000.0f
-
-#elif defined(FULL_PRECISION)
-#define ZERO 0.0
-#define ONE 1.0
-#define HALF 0.5
-#define EPSILON 1.0e-30
-#define STATE_EPS .02
-// calc refine is done in single precision
-#define REFINE_GRADIENT 0.10
-#define COARSEN_GRADIENT 0.05
-#define REFINE_HALF 0.5
-#define REFINE_NEG_THOUSAND -1000.0
-
-#endif
-
-#ifdef _OPENMP
-static bool iversion_flag = false;
-#endif
-
-typedef unsigned int uint;
-
-static const char *state_timer_descriptor[STATE_TIMER_SIZE] = {
- "state_timer_apply_BCs",
- "state_timer_set_timestep",
- "state_timer_finite_difference",
- "state_timer_refine_potential",
- "state_timer_calc_mpot",
- "state_timer_rezone_all",
- "state_timer_mass_sum",
- "state_timer_read",
- "state_timer_write"
-};
-
-#ifdef HAVE_OPENCL
-#include "state_kernel.inc"
-#endif
-
-struct esum_type{
- double sum;
- double correction;
-};
-#ifdef HAVE_MPI
-MPI_Datatype MPI_TWO_DOUBLES;
-MPI_Op KNUTH_SUM;
-int commutative = 1;
-void knuth_sum(struct esum_type *in, struct esum_type *inout, int *len, MPI_Datatype *MPI_TWO_DOUBLES);
-#endif
-
-int save_ncells;
-
-#define CONSERVED_EQNS
-
-#define SQR(x) ( x*x )
-#define MIN3(x,y,z) ( min( min(x,y), z) )
-
-#ifdef HAVE_OPENCL
-cl_kernel kernel_set_timestep;
-cl_kernel kernel_reduction_min;
-cl_kernel kernel_copy_state_data;
-cl_kernel kernel_copy_state_ghost_data;
-cl_kernel kernel_apply_boundary_conditions;
-cl_kernel kernel_apply_boundary_conditions_local;
-cl_kernel kernel_apply_boundary_conditions_ghost;
-cl_kernel kernel_calc_finite_difference;
-cl_kernel kernel_refine_potential;
-cl_kernel kernel_reduce_sum_mass_stage1of2;
-cl_kernel kernel_reduce_sum_mass_stage2of2;
-cl_kernel kernel_reduce_epsum_mass_stage1of2;
-cl_kernel kernel_reduce_epsum_mass_stage2of2;
-#endif
-
-inline real_t U_halfstep(// XXX Fix the subindices to be more intuitive XXX
- real_t deltaT, // Timestep
- real_t U_i, // Initial cell's (downwind's) state variable
- real_t U_n, // Next cell's (upwind's) state variable
- real_t F_i, // Initial cell's (downwind's) state variable flux
- real_t F_n, // Next cell's (upwind's) state variable flux
- real_t r_i, // Initial cell's (downwind's) center to face distance
- real_t r_n, // Next cell's (upwind's) center to face distance
- real_t A_i, // Cell's face surface area
- real_t A_n, // Cell's neighbor's face surface area
- real_t V_i, // Cell's volume
- real_t V_n) { // Cell's neighbor's volume
-
- return (( r_i*U_n + r_n*U_i ) / ( r_i + r_n ))
- - HALF*deltaT*(( F_n*A_n*min(ONE, A_i/A_n) - F_i*A_i*min(ONE, A_n/A_i) )
- / ( V_n*min(HALF, V_i/V_n) + V_i*min(HALF, V_n/V_i) ));
-
-}
-
-inline real_t U_fullstep(
- real_t deltaT,
- real_t dr,
- real_t U,
- real_t F_plus,
- real_t F_minus,
- real_t G_plus,
- real_t G_minus) {
-
- return (U - (deltaT / dr)*(F_plus - F_minus + G_plus - G_minus));
-
-}
-
-
-inline real_t w_corrector(
- real_t deltaT, // Timestep
- real_t dr, // Cell's center to face distance
- real_t U_eigen, // State variable's eigenvalue (speed)
- real_t grad_half, // Centered gradient
- real_t grad_minus, // Downwind gradient
- real_t grad_plus) { // Upwind gradient
-
- real_t nu = HALF * U_eigen * deltaT / dr;
- nu = nu * (ONE - nu);
-
- real_t rdenom = ONE / max(SQR(grad_half), EPSILON);
- real_t rplus = (grad_plus * grad_half) * rdenom;
- real_t rminus = (grad_minus * grad_half) * rdenom;
-
- return HALF*nu*(ONE- max(MIN3(ONE, rplus, rminus), ZERO));
-}
-
-State::State(Mesh *mesh_in)
-{
- for (int i = 0; i < STATE_TIMER_SIZE; i++){
- cpu_timers[i] = 0.0;
- }
- for (int i = 0; i < STATE_TIMER_SIZE; i++){
- gpu_timers[i] = 0L;
- }
-
- mesh = mesh_in;
-
-#ifdef HAVE_MPI
- int mpi_init;
- MPI_Initialized(&mpi_init);
- if (mpi_init){
- MPI_Type_contiguous(2, MPI_DOUBLE, &MPI_TWO_DOUBLES);
- MPI_Type_commit(&MPI_TWO_DOUBLES);
- MPI_Op_create((MPI_User_function *)knuth_sum, commutative, &KNUTH_SUM);
- // FIXME add fini and set size
- if (mesh->parallel) state_memory.pinit(MPI_COMM_WORLD, 2L * 1024 * 1024 * 1024);
- }
-#endif
-}
-
-void State::init(int do_gpu_calc)
-{
- if (do_gpu_calc) {
-#ifdef HAVE_OPENCL
- cl_context context = ezcl_get_context();
-
- if (mesh->mype == 0) printf("Starting compile of kernels in state\n");
- const char *defines = NULL;
- cl_program program = ezcl_create_program_wsource(context, defines, state_kern_source);
-
- kernel_set_timestep = ezcl_create_kernel_wprogram(program, "set_timestep_cl");
- kernel_reduction_min = ezcl_create_kernel_wprogram(program, "finish_reduction_min_cl");
- kernel_copy_state_data = ezcl_create_kernel_wprogram(program, "copy_state_data_cl");
- kernel_copy_state_ghost_data = ezcl_create_kernel_wprogram(program, "copy_state_ghost_data_cl");
- kernel_apply_boundary_conditions = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_cl");
- kernel_apply_boundary_conditions_local = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_local_cl");
- kernel_apply_boundary_conditions_ghost = ezcl_create_kernel_wprogram(program, "apply_boundary_conditions_ghost_cl");
- kernel_calc_finite_difference = ezcl_create_kernel_wprogram(program, "calc_finite_difference_cl");
- kernel_refine_potential = ezcl_create_kernel_wprogram(program, "refine_potential_cl");
- kernel_reduce_sum_mass_stage1of2 = ezcl_create_kernel_wprogram(program, "reduce_sum_mass_stage1of2_cl");
- kernel_reduce_sum_mass_stage2of2 = ezcl_create_kernel_wprogram(program, "reduce_sum_mass_stage2of2_cl");
- kernel_reduce_epsum_mass_stage1of2 = ezcl_create_kernel_wprogram(program, "reduce_epsum_mass_stage1of2_cl");
- kernel_reduce_epsum_mass_stage2of2 = ezcl_create_kernel_wprogram(program, "reduce_epsum_mass_stage2of2_cl");
-
- ezcl_program_release(program);
- if (mesh->mype == 0) printf("Finishing compile of kernels in state\n");
-#endif
- }
-
- //printf("\nDEBUG -- Calling state memory memory malloc at line %d\n",__LINE__);
- allocate(mesh->ncells);
- //state_memory.memory_report();
- //printf("DEBUG -- Finished state memory memory malloc at line %d\n\n",__LINE__);
-
-}
-
-void State::allocate(size_t ncells)
-{
- int flags = 0;
- flags = RESTART_DATA;
-#ifdef HAVE_J7
- if (mesh->parallel) flags = LOAD_BALANCE_MEMORY;
-#endif
-
- H = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "H", flags);
- U = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "U", flags);
- V = (state_t *)state_memory.memory_malloc(ncells, sizeof(state_t), "V", flags);
-}
-
-void State::resize(size_t new_ncells){
- size_t current_size = state_memory.get_memory_size(H);
- if (new_ncells > current_size) state_memory.memory_realloc_all(new_ncells);
-
- //printf("\nDEBUG -- Calling state memory resize at line %d\n",__LINE__);
- //state_memory.memory_report();
- //printf("DEBUG -- Finished state memory resize at line %d\n\n",__LINE__);
-}
-
-void State::memory_reset_ptrs(void){
- H = (state_t *)state_memory.get_memory_ptr("H");
- U = (state_t *)state_memory.get_memory_ptr("U");
- V = (state_t *)state_memory.get_memory_ptr("V");
-
- //printf("\nDEBUG -- Calling state memory reset_ptrs at line %d\n",__LINE__);
- //state_memory.memory_report();
- //printf("DEBUG -- Finished state memory reset_ptrs at line %d\n\n",__LINE__);
-}
-
-void State::terminate(void)
-{
- state_memory.memory_delete(H);
- state_memory.memory_delete(U);
- state_memory.memory_delete(V);
-
-#ifdef HAVE_OPENCL
- ezcl_device_memory_delete(dev_deltaT);
-
- gpu_state_memory.memory_delete(dev_H);
- gpu_state_memory.memory_delete(dev_U);
- gpu_state_memory.memory_delete(dev_V);
-
- ezcl_kernel_release(kernel_set_timestep);
- ezcl_kernel_release(kernel_reduction_min);
- ezcl_kernel_release(kernel_copy_state_data);
- ezcl_kernel_release(kernel_copy_state_ghost_data);
- ezcl_kernel_release(kernel_apply_boundary_conditions);
- ezcl_kernel_release(kernel_apply_boundary_conditions_local);
- ezcl_kernel_release(kernel_apply_boundary_conditions_ghost);
- ezcl_kernel_release(kernel_calc_finite_difference);
- ezcl_kernel_release(kernel_refine_potential);
- ezcl_kernel_release(kernel_reduce_sum_mass_stage1of2);
- ezcl_kernel_release(kernel_reduce_sum_mass_stage2of2);
- ezcl_kernel_release(kernel_reduce_epsum_mass_stage1of2);
- ezcl_kernel_release(kernel_reduce_epsum_mass_stage2of2);
-#endif
-#ifdef HAVE_MPI
- if (mesh->parallel) state_memory.pfini();
-#endif
-}
-
-#ifdef HAVE_MPI
-void knuth_sum(struct esum_type *in, struct esum_type *inout, int *len, MPI_Datatype *MPI_TWO_DOUBLES)
-{
- double u, v, upt, up, vpp;
- u = inout->sum;
- v = in->sum + (in->correction+inout->correction);
- upt = u + v;
- up = upt - v;
- vpp = upt - up;
- inout->sum = upt;
- inout->correction = (u - up) + (v - vpp);
-
- // Just to block compiler warnings
- if (1==2) printf("DEBUG len %d datatype %lld\n",*len,(long long)(*MPI_TWO_DOUBLES) );
-}
-#endif
-
-void State::add_boundary_cells(void)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- // This is for a mesh with no boundary cells -- they are added and
- // the mesh sizes increased
- size_t &ncells = mesh->ncells;
- vector<int> &index = mesh->index;
- vector<spatial_t> &x = mesh->x;
- vector<spatial_t> &dx = mesh->dx;
- vector<spatial_t> &y = mesh->y;
- vector<spatial_t> &dy = mesh->dy;
-
- int *i = mesh->i;
- int *j = mesh->j;
- int *level = mesh->level;
- int *celltype = mesh->celltype;
- int *nlft = mesh->nlft;
- int *nrht = mesh->nrht;
- int *nbot = mesh->nbot;
- int *ntop = mesh->ntop;
-
- vector<int> &lev_ibegin = mesh->lev_ibegin;
- vector<int> &lev_iend = mesh->lev_iend;
- vector<int> &lev_jbegin = mesh->lev_jbegin;
- vector<int> &lev_jend = mesh->lev_jend;
-
- // Pre-count number of cells to add
- int icount = 0;
- for (uint ic=0; ic<ncells; ic++) {
- if (i[ic] == lev_ibegin[level[ic]]) icount++; // Left boundary
- if (i[ic] == lev_iend[level[ic]]) icount++; // Right boundary
- if (j[ic] == lev_jbegin[level[ic]]) icount++; // Bottom boundary
- if (j[ic] == lev_jend[level[ic]]) icount++; // Top boundary
- }
-
- int new_ncells = ncells + icount;
- // Increase the arrays for the new boundary cells
- H=(state_t *)state_memory.memory_realloc(new_ncells, H);
- U=(state_t *)state_memory.memory_realloc(new_ncells, U);
- V=(state_t *)state_memory.memory_realloc(new_ncells, V);
- //printf("\nDEBUG add_boundary cells\n");
- //state_memory.memory_report();
- //printf("DEBUG end add_boundary cells\n\n");
-
- mesh->i =(int *)mesh->mesh_memory.memory_realloc(new_ncells, i);
- mesh->j =(int *)mesh->mesh_memory.memory_realloc(new_ncells, j);
- mesh->level =(int *)mesh->mesh_memory.memory_realloc(new_ncells, level);
- mesh->celltype =(int *)mesh->mesh_memory.memory_realloc(new_ncells, celltype);
- mesh->nlft =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nlft);
- mesh->nrht =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nrht);
- mesh->nbot =(int *)mesh->mesh_memory.memory_realloc(new_ncells, nbot);
- mesh->ntop =(int *)mesh->mesh_memory.memory_realloc(new_ncells, ntop);
- //memory_reset_ptrs();
- i = mesh->i;
- j = mesh->j;
- level = mesh->level;
- celltype = mesh->celltype;
- nlft = mesh->nlft;
- nrht = mesh->nrht;
- nbot = mesh->nbot;
- ntop = mesh->ntop;
-
- index.resize(new_ncells);
- x.resize(new_ncells);
- dx.resize(new_ncells);
- y.resize(new_ncells);
- dy.resize(new_ncells);
-
- for (int nc=ncells; nc<new_ncells; nc++) {
- nlft[nc] = -1;
- nrht[nc] = -1;
- nbot[nc] = -1;
- ntop[nc] = -1;
- }
-
- // In the first pass, set two of the neighbor indices and all
- // the other data to be brought across. Set the inverse of the
- // the velocity to enforce the reflective boundary condition
- uint nc=ncells;
- for (uint ic=0; ic<ncells; ic++) {
- if (i[ic] == lev_ibegin[level[ic]]) {
- nlft[ic] = nc;
- nlft[nc] = nc;
- nrht[nc] = ic;
- i[nc] = lev_ibegin[level[ic]]-1;
- j[nc] = j[ic];
- level[nc] = level[ic];
- dx[nc] = dx[ic];
- dy[nc] = dy[ic];
- x[nc] = x[ic]-dx[ic];
- y[nc] = y[ic];
- H[nc] = H[ic];
- U[nc] = -U[ic];
- V[nc] = V[ic];
- nc++;
- }
- if (i[ic] == lev_iend[level[ic]]) {
- nrht[ic] = nc;
- nrht[nc] = nc;
- nlft[nc] = ic;
- i[nc] = lev_iend[level[ic]]+1;
- j[nc] = j[ic];
- level[nc] = level[ic];
- dx[nc] = dx[ic];
- dy[nc] = dy[ic];
- x[nc] = x[ic]+dx[ic];
- y[nc] = y[ic];
- H[nc] = H[ic];
- U[nc] = -U[ic];
- V[nc] = V[ic];
- nc++;
- }
- if (j[ic] == lev_jbegin[level[ic]]) {
- nbot[ic] = nc;
- nbot[nc] = nc;
- ntop[nc] = ic;
- i[nc] = i[ic];
- j[nc] = lev_jbegin[level[ic]]-1;
- level[nc] = level[ic];
- dx[nc] = dx[ic];
- dy[nc] = dy[ic];
- x[nc] = x[ic];
- y[nc] = y[ic]-dy[ic];
- H[nc] = H[ic];
- U[nc] = U[ic];
- V[nc] = -V[ic];
- nc++;
- }
- if (j[ic] == lev_jend[level[ic]]) {
- ntop[ic] = nc;
- ntop[nc] = nc;
- nbot[nc] = ic;
- i[nc] = i[ic];
- j[nc] = lev_jend[level[ic]]+1;
- level[nc] = level[ic];
- dx[nc] = dx[ic];
- dy[nc] = dy[ic];
- x[nc] = x[ic];
- y[nc] = y[ic]+dy[ic];
- H[nc] = H[ic];
- U[nc] = U[ic];
- V[nc] = -V[ic];
- nc++;
- }
- }
-
- // Now set the other two neighbor indices
- for (int nc=ncells; nc<new_ncells; nc++) {
- if (i[nc] == lev_ibegin[level[nc]]-1) {
- // Need to check if also a bottom boundary cell
- if (j[nc] == lev_jbegin[level[nc]]){
- nbot[nc] = nc;
- } else {
- nbot[nc] = nlft[nbot[nrht[nc]]];
- }
- if (j[nc] == lev_jend[level[nc]]){
- ntop[nc] = nc;
- } else {
- ntop[nc] = nlft[ntop[nrht[nc]]];
- }
- }
- if (i[nc] == lev_iend[level[nc]]+1) {
- if (level[nc] <= level[nbot[nlft[nc]]]){
- if (j[nc] == lev_jbegin[level[nc]]){
- nbot[nc] = nc;
- } else {
- nbot[nc] = nrht[nbot[nlft[nc]]];
- }
- if (j[nc] == lev_jend[level[nc]]){
- ntop[nc] = nc;
- } else {
- ntop[nc] = nrht[ntop[nlft[nc]]];
- }
- // calculation is a little different if going through a
- // finer zoned region
- } else {
- nbot[nc] = nrht[nrht[nbot[nlft[nc]]]];
- ntop[nc] = nrht[nrht[ntop[nlft[nc]]]];
- }
- }
- if (j[nc] == lev_jbegin[level[nc]]-1) {
- if (i[nc] == lev_ibegin[level[nc]]){
- nlft[nc] = nc;
- } else {
- nlft[nc] = nbot[nlft[ntop[nc]]];
- }
- if (i[nc] == lev_iend[level[nc]]){
- nrht[nc] = nc;
- } else {
- nrht[nc] = nbot[nrht[ntop[nc]]];
- }
- }
- if (j[nc] == lev_jend[level[nc]]+1) {
- if (level[nc] <= level[nlft[nbot[nc]]]){
- if (i[nc] == lev_ibegin[level[nc]]){
- nlft[nc] = nc;
- } else {
- nlft[nc] = ntop[nlft[nbot[nc]]];
- }
- if (i[nc] == lev_iend[level[nc]]){
- nrht[nc] = nc;
- } else {
- nrht[nc] = ntop[nrht[nbot[nc]]];
- }
- } else {
- nlft[nc] = ntop[ntop[nlft[nbot[nc]]]];
- nrht[nc] = ntop[ntop[nrht[nbot[nc]]]];
- }
- }
- }
- save_ncells = ncells;
- ncells = new_ncells;
-
- cpu_timers[STATE_TIMER_APPLY_BCS] += cpu_timer_stop(tstart_cpu);
-}
-
-void State::apply_boundary_conditions_local(void)
-{
- static int *nlft, *nrht, *nbot, *ntop;
-
- size_t &ncells = mesh->ncells;
- nlft = mesh->nlft;
- nrht = mesh->nrht;
- nbot = mesh->nbot;
- ntop = mesh->ntop;
-
- // This is for a mesh with boundary cells
- int lowerBound, upperBound;
- mesh->get_bounds(lowerBound, upperBound);
- for (uint ic=lowerBound; ic<upperBound; ic++) {
- if (mesh->is_left_boundary(ic)) {
- int nr = nrht[ic];
- if (nr < (int)ncells) {
- H[ic] = H[nr];
- U[ic] = -U[nr];
- V[ic] = V[nr];
- }
- }
- if (mesh->is_right_boundary(ic)) {
- int nl = nlft[ic];
- if (nl < (int)ncells) {
- H[ic] = H[nl];
- U[ic] = -U[nl];
- V[ic] = V[nl];
- }
- }
- if (mesh->is_bottom_boundary(ic)) {
- int nt = ntop[ic];
- if (nt < (int)ncells) {
- H[ic] = H[nt];
- U[ic] = U[nt];
- V[ic] = -V[nt];
- }
- }
- if (mesh->is_top_boundary(ic)) {
- int nb = nbot[ic];
- if (nb < (int)ncells) {
- H[ic] = H[nb];
- U[ic] = U[nb];
- V[ic] = -V[nb];
- }
- }
- }
-}
-
-void State::apply_boundary_conditions_ghost(void)
-{
- static int *nlft, *nrht, *nbot, *ntop;
-
- size_t &ncells = mesh->ncells;
- nlft = mesh->nlft;
- nrht = mesh->nrht;
- nbot = mesh->nbot;
- ntop = mesh->ntop;
-
- // This is for a mesh with boundary cells
- int lowerBound, upperBound;
- mesh->get_bounds(lowerBound, upperBound);
- for (uint ic=lowerBound; ic<upperBound; ic++) {
- if (mesh->is_left_boundary(ic)) {
- int nr = nrht[ic];
- if (nr >= (int)ncells) {
- H[ic] = H[nr];
- U[ic] = -U[nr];
- V[ic] = V[nr];
- }
- }
- if (mesh->is_right_boundary(ic)) {
- int nl = nlft[ic];
- if (nl >= (int)ncells) {
- H[ic] = H[nl];
- U[ic] = -U[nl];
- V[ic] = V[nl];
- }
- }
- if (mesh->is_bottom_boundary(ic)) {
- int nt = ntop[ic];
- if (nt >= (int)ncells) {
- H[ic] = H[nt];
- U[ic] = U[nt];
- V[ic] = -V[nt];
- }
- }
- if (mesh->is_top_boundary(ic)) {
- int nb = nbot[ic];
- if (nb >= (int)ncells) {
- H[ic] = H[nb];
- U[ic] = U[nb];
- V[ic] = -V[nb];
- }
- }
- }
-}
-
-void State::apply_boundary_conditions(void)
-{
- int *nlft, *nrht, *nbot, *ntop;
-
- size_t &ncells = mesh->ncells;
- nlft = mesh->nlft;
- nrht = mesh->nrht;
- nbot = mesh->nbot;
- ntop = mesh->ntop;
-
- // This is for a mesh with boundary cells
- int lowerBound, upperBound;
- mesh->get_bounds(lowerBound, upperBound);
- for (uint ic=lowerBound; ic<upperBound; ic++) {
- if (mesh->is_left_boundary(ic)) {
- int nr = nrht[ic];
- H[ic] = H[nr];
- U[ic] = -U[nr];
- V[ic] = V[nr];
- }
- if (mesh->is_right_boundary(ic)) {
- int nl = nlft[ic];
- H[ic] = H[nl];
- U[ic] = -U[nl];
- V[ic] = V[nl];
- }
- if (mesh->is_bottom_boundary(ic)) {
- int nt = ntop[ic];
- H[ic] = H[nt];
- U[ic] = U[nt];
- V[ic] = -V[nt];
- }
- if (mesh->is_top_boundary(ic)) {
- int nb = nbot[ic];
- H[ic] = H[nb];
- U[ic] = U[nb];
- V[ic] = -V[nb];
- }
- }
-}
-
-void State::remove_boundary_cells(void)
-{
- if(! mesh->have_boundary) {
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- size_t &ncells = mesh->ncells;
-
- // Resize to drop all the boundary cells
- ncells = save_ncells;
- H=(state_t *)state_memory.memory_realloc(save_ncells, H);
- U=(state_t *)state_memory.memory_realloc(save_ncells, U);
- V=(state_t *)state_memory.memory_realloc(save_ncells, V);
- //printf("\nDEBUG remove_boundary cells\n");
- //state_memory.memory_report();
- //printf("DEBUG end remove_boundary cells\n\n");
-
- mesh->i = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->i);
- mesh->j = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->j);
- mesh->level = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->level);
- mesh->celltype = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->celltype);
- mesh->nlft = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nlft);
- mesh->nrht = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nrht);
- mesh->nbot = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->nbot);
- mesh->ntop = (int *)mesh->mesh_memory.memory_realloc(save_ncells, mesh->ntop);
-
- // Reset the neighbors due to the dropped boundary cells
- mesh->index.resize(save_ncells);
- mesh->x.resize(save_ncells);
- mesh->dx.resize(save_ncells);
- mesh->y.resize(save_ncells);
- mesh->dy.resize(save_ncells);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- mesh->set_bounds(mesh->ncells);
-
- int lowerBound, upperBound;
- mesh->get_bounds(lowerBound, upperBound);
- for (uint ic=lowerBound; ic<upperBound; ic++) {
- if (mesh->i[ic] == mesh->lev_ibegin[mesh->level[ic]]) mesh->nlft[ic] = ic;
- if (mesh->i[ic] == mesh->lev_iend[mesh->level[ic]]) mesh->nrht[ic] = ic;
- if (mesh->j[ic] == mesh->lev_jbegin[mesh->level[ic]]) mesh->nbot[ic] = ic;
- if (mesh->j[ic] == mesh->lev_jend[mesh->level[ic]]) mesh->ntop[ic] = ic;
- }
-
- } // if have_boundary
-}
-
-double State::set_timestep(double g, double sigma)
-{
- double globalmindeltaT;
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- static double mindeltaT;
-
- int lowerBounds, upperBounds;
- mesh->set_bounds(mesh->ncells);
- mesh->get_bounds(lowerBounds, upperBounds);
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- mindeltaT = 1000;
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- double mymindeltaT = 1000.0; // private for each thread
-
- for (int ic=lowerBounds; ic<upperBounds; ic++) {
- if (mesh->celltype[ic] == REAL_CELL) {
- int lev = mesh->level[ic];
- double wavespeed = sqrt(g*H[ic]);
- double xspeed = (fabs(U[ic])+wavespeed)/mesh->lev_deltax[lev];
- double yspeed = (fabs(V[ic])+wavespeed)/mesh->lev_deltay[lev];
- double deltaT=sigma/(xspeed+yspeed);
- if (deltaT < mymindeltaT) mymindeltaT = deltaT;
- }
- }
-
-#ifdef _OPENMP
-#pragma omp critical
- {
-#endif
- if (mymindeltaT < mindeltaT) mindeltaT = mymindeltaT;
-#ifdef _OPENMP
- } // End critical region
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
-
-
- globalmindeltaT = mindeltaT;
-#ifdef HAVE_MPI
- if (mesh->parallel) MPI_Allreduce(&mindeltaT, &globalmindeltaT, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
-#endif
-
- cpu_timers[STATE_TIMER_SET_TIMESTEP] += cpu_timer_stop(tstart_cpu);
-#ifdef _OPENMP
- } // End master region
-#pragma omp barrier
-#endif
-
- return(globalmindeltaT);
-}
-
-#ifdef HAVE_OPENCL
-double State::gpu_set_timestep(double sigma)
-{
- double deltaT, globalmindeltaT;
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- size_t &ncells = mesh->ncells;
-#ifdef HAVE_MPI
- int ¶llel = mesh->parallel;
-#endif
- cl_mem &dev_level = mesh->dev_level;
- cl_mem &dev_celltype = mesh->dev_celltype;
- cl_mem &dev_levdx = mesh->dev_levdx;
- cl_mem &dev_levdy = mesh->dev_levdy;
-
- assert(dev_H);
- assert(dev_U);
- assert(dev_V);
- assert(dev_level);
- assert(dev_celltype);
- assert(dev_levdx);
- assert(dev_levdy);
-
- size_t local_work_size = 128;
- size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
- size_t block_size = global_work_size/local_work_size;
-
- cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
-
- /*
- __kernel void set_timestep_cl(
- const int ncells, // 0 Total number of cells.
- const real_t sigma, // 1
- __global const state_t *H, // 2
- __global const state_t *U, // 3
- __global const state_t *V, // 4
- __global const int *level, // 5 Array of level information.
- __global const int *celltype, // 6
- __global const real_t *lev_dx, // 7
- __global const real_t *lev_dy, // 8
- __global real_t *redscratch, // 9
- __global real_t *deltaT, // 10
- __local real_t *tile) // 11
- */
-
- real_t sigma_local = sigma;
- ezcl_set_kernel_arg(kernel_set_timestep, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_set_timestep, 1, sizeof(cl_real_t), (void *)&sigma_local);
- ezcl_set_kernel_arg(kernel_set_timestep, 2, sizeof(cl_mem), (void *)&dev_H);
- ezcl_set_kernel_arg(kernel_set_timestep, 3, sizeof(cl_mem), (void *)&dev_U);
- ezcl_set_kernel_arg(kernel_set_timestep, 4, sizeof(cl_mem), (void *)&dev_V);
- ezcl_set_kernel_arg(kernel_set_timestep, 5, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_set_timestep, 6, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_set_timestep, 7, sizeof(cl_mem), (void *)&dev_levdx);
- ezcl_set_kernel_arg(kernel_set_timestep, 8, sizeof(cl_mem), (void *)&dev_levdy);
- ezcl_set_kernel_arg(kernel_set_timestep, 9, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_set_timestep, 10, sizeof(cl_mem), (void *)&dev_deltaT);
- ezcl_set_kernel_arg(kernel_set_timestep, 11, local_work_size*sizeof(cl_real_t), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_set_timestep, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- if (block_size > 1){
- /*
- __kernel void finish_reduction_min_cl(
- const int isize,
- __global real_t *redscratch,
- __global real_t *deltaT,
- __local real_t *tile)
- */
- ezcl_set_kernel_arg(kernel_reduction_min, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_reduction_min, 1, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_reduction_min, 2, sizeof(cl_mem), (void *)&dev_deltaT);
- ezcl_set_kernel_arg(kernel_reduction_min, 3, local_work_size*sizeof(cl_real_t), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduction_min, 1, NULL, &local_work_size, &local_work_size, NULL);
- }
-
- real_t deltaT_local;
- ezcl_enqueue_read_buffer(command_queue, dev_deltaT, CL_TRUE, 0, sizeof(cl_real_t), &deltaT_local, NULL);
- deltaT = deltaT_local;
-
- globalmindeltaT = deltaT;
-#ifdef HAVE_MPI
- if (parallel) MPI_Allreduce(&deltaT, &globalmindeltaT, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
-#endif
-
- ezcl_device_memory_delete(dev_redscratch);
-
- gpu_timers[STATE_TIMER_SET_TIMESTEP] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
-
- return(globalmindeltaT);
-}
-#endif
-
-void State::fill_circle(double circ_radius,// Radius of circle in grid units.
- double fill_value, // Circle height for shallow water.
- double background) // Background height for shallow water.
-{
- size_t &ncells = mesh->ncells;
- vector<spatial_t> &x = mesh->x;
- vector<spatial_t> &dx = mesh->dx;
- vector<spatial_t> &y = mesh->y;
- vector<spatial_t> &dy = mesh->dy;
-
- for (uint ic = 0; ic < ncells; ic++)
- { H[ic] = background;
- U[ic] = V[ic] = 0.0; }
-
- // Clear the old k-D tree and generate new data (slow but necessary here).
- //KDTree_Destroy(&mesh->tree);
- mesh->kdtree_setup();
-
- int nez;
- vector<int> ind(ncells);
- vector<double> weight(ncells);
-
-#ifdef FULL_PRECISION
- KDTree_QueryCircleInterior_Double(&mesh->tree, &nez, &(ind[0]), circ_radius, ncells,
- &x[0], &dx[0],
- &y[0], &dy[0]);
-#else
- KDTree_QueryCircleInterior_Float(&mesh->tree, &nez, &(ind[0]), circ_radius, ncells,
- &x[0], &dx[0],
- &y[0], &dy[0]);
-#endif
- for (int ic = 0; ic < nez; ++ic)
- { H[ind[ic]] = fill_value; }
-
-#ifdef FULL_PRECISION
- KDTree_QueryCircleIntersectWeighted_Double(&mesh->tree, &nez, &(ind[0]), &(weight[0]),
- circ_radius, ncells,
- &x[0], &dx[0],
- &y[0], &dy[0]);
-#else
- KDTree_QueryCircleIntersectWeighted_Float(&mesh->tree, &nez, &(ind[0]), &(weight[0]),
- circ_radius, ncells,
- &x[0], &dx[0],
- &y[0], &dy[0]);
-#endif
-
- for (int ic = 0; ic < nez; ++ic)
- { H[ind[ic]] = background + (fill_value - background) * weight[ic]; }
-
- KDTree_Destroy(&mesh->tree);
-}
-
-void State::state_reorder(vector<int> iorder)
-{
- H = state_memory.memory_reorder(H, &iorder[0]);
- U = state_memory.memory_reorder(U, &iorder[0]);
- V = state_memory.memory_reorder(V, &iorder[0]);
- //printf("\nDEBUG reorder cells\n");
- //state_memory.memory_report();
- //printf("DEBUG end reorder cells\n\n");
-}
-
-void State::rezone_all(int icount, int jcount, vector<int> mpot)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- mesh->rezone_all(icount, jcount, mpot, 1, state_memory);
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- memory_reset_ptrs();
-
- cpu_timers[STATE_TIMER_REZONE_ALL] += cpu_timer_stop(tstart_cpu);
-#ifdef _OPENMP
- } // end master region
-#endif
-}
-
-
-#ifdef HAVE_OPENCL
-void State::gpu_rezone_all(int icount, int jcount, bool localStencil)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- // Just to get rid of compiler warnings
- if (1 == 2) printf("DEBUG -- localStencil is %d\n",localStencil);
-
- mesh->gpu_rezone_all(icount, jcount, dev_mpot, gpu_state_memory);
- dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H");
- dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U");
- dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V");
-
- gpu_timers[STATE_TIMER_REZONE_ALL] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
-}
-#endif
-
-//define macro for squaring a number
-#define SQ(x) ((x)*(x))
-//define macro to find minimum of 3 values
-//#define MIN3(a,b,c) (min(min((a),(b)),(c)))
-
-#define HXFLUX(ic) ( U[ic] )
-#define UXFLUX(ic) ( SQ(U[ic])/H[ic] + ghalf*SQ(H[ic]) )
-#define UVFLUX(ic) ( U[ic]*V[ic]/H[ic] )
-
-#define HXFLUXIC ( Uic )
-#define HXFLUXNL ( Ul )
-#define HXFLUXNR ( Ur )
-#define HXFLUXNB ( Ub )
-#define HXFLUXNT ( Ut )
-
-#define UXFLUXIC ( SQ(Uic)/Hic + ghalf*SQ(Hic) )
-#define UXFLUXNL ( SQ(Ul)/Hl + ghalf*SQ(Hl) )
-#define UXFLUXNR ( SQ(Ur)/Hr + ghalf*SQ(Hr) )
-#define UXFLUXNB ( SQ(Ub)/Hb + ghalf*SQ(Hb) )
-#define UXFLUXNT ( SQ(Ut)/Ht + ghalf*SQ(Ht) )
-
-#define UVFLUXIC ( Uic*Vic/Hic )
-#define UVFLUXNL ( Ul*Vl/Hl )
-#define UVFLUXNR ( Ur*Vr/Hr )
-#define UVFLUXNB ( Ub*Vb/Hb )
-#define UVFLUXNT ( Ut*Vt/Ht )
-
-#define HYFLUX(ic) ( V[ic] )
-#define VUFLUX(ic) ( V[ic]*U[ic]/H[ic] )
-#define VYFLUX(ic) ( SQ(V[ic])/H[ic] + ghalf*SQ(H[ic]) )
-
-#define HYFLUXIC ( Vic )
-#define HYFLUXNL ( Vl )
-#define HYFLUXNR ( Vr )
-#define HYFLUXNB ( Vb )
-#define HYFLUXNT ( Vt )
-
-#define VUFLUXIC ( Vic*Uic/Hic )
-#define VUFLUXNL ( Vl*Ul/Hl )
-#define VUFLUXNR ( Vr*Ur/Hr )
-#define VUFLUXNB ( Vb*Ub/Hb )
-#define VUFLUXNT ( Vt*Ut/Ht )
-
-#define VYFLUXIC ( SQ(Vic)/Hic + ghalf*SQ(Hic) )
-#define VYFLUXNL ( SQ(Vl)/Hl + ghalf*SQ(Hl) )
-#define VYFLUXNR ( SQ(Vr)/Hr + ghalf*SQ(Hr) )
-#define VYFLUXNB ( SQ(Vb)/Hb + ghalf*SQ(Hb) )
-#define VYFLUXNT ( SQ(Vt)/Ht + ghalf*SQ(Ht) )
-
-
-#define HNEWXFLUXMINUS ( Uxminus )
-#define HNEWXFLUXPLUS ( Uxplus )
-#define UNEWXFLUXMINUS ( SQ(Uxminus)/Hxminus + ghalf*SQ(Hxminus) )
-#define UNEWXFLUXPLUS ( SQ(Uxplus) /Hxplus + ghalf*SQ(Hxplus) )
-#define UVNEWFLUXMINUS ( Uxminus*Vxminus/Hxminus )
-#define UVNEWFLUXPLUS ( Uxplus *Vxplus /Hxplus )
-
-#define HNEWYFLUXMINUS ( Vyminus )
-#define HNEWYFLUXPLUS ( Vyplus )
-#define VNEWYFLUXMINUS ( SQ(Vyminus)/Hyminus + ghalf*SQ(Hyminus) )
-#define VNEWYFLUXPLUS ( SQ(Vyplus) /Hyplus + ghalf*SQ(Hyplus) )
-#define VUNEWFLUXMINUS ( Vyminus*Uyminus/Hyminus )
-#define VUNEWFLUXPLUS ( Vyplus *Uyplus /Hyplus )
-
-// XXX ADDED XXX
-#define HXFLUXNLT ( Ult )
-#define HXFLUXNRT ( Urt )
-#define UXFLUXNLT ( SQR(Ult)/Hlt + ghalf*SQR(Hlt) )
-#define UXFLUXNRT ( SQR(Urt)/Hrt + ghalf*SQR(Hrt) )
-#define UVFLUXNLT ( Ult*Vlt/Hlt )
-#define UVFLUXNRT ( Urt*Vrt/Hrt )
-#define HYFLUXNBR ( Vbr )
-#define HYFLUXNTR ( Vtr )
-#define VUFLUXNBR ( Vbr*Ubr/Hbr )
-#define VUFLUXNTR ( Vtr*Utr/Htr )
-#define VYFLUXNBR ( SQR(Vbr)/Hbr + ghalf*SQR(Hbr) )
-#define VYFLUXNTR ( SQR(Vtr)/Htr + ghalf*SQR(Htr) )
-#define HNEWXFLUXMINUS2 ( Uxminus2 )
-#define HNEWXFLUXPLUS2 ( Uxplus2 )
-#define UNEWXFLUXMINUS2 ( SQR(Uxminus2)/Hxminus2 + ghalf*SQR(Hxminus2) )
-#define UNEWXFLUXPLUS2 ( SQR(Uxplus2) /Hxplus2 + ghalf*SQR(Hxplus2) )
-#define UVNEWFLUXMINUS2 ( Uxminus2*Vxminus2/Hxminus2 )
-#define UVNEWFLUXPLUS2 ( Uxplus2 *Vxplus2 /Hxplus2 )
-#define HNEWYFLUXMINUS2 ( Vyminus2 )
-#define HNEWYFLUXPLUS2 ( Vyplus2 )
-#define VNEWYFLUXMINUS2 ( SQR(Vyminus2)/Hyminus2 + ghalf*SQR(Hyminus2) )
-#define VNEWYFLUXPLUS2 ( SQR(Vyplus2) /Hyplus2 + ghalf*SQR(Hyplus2) )
-#define VUNEWFLUXMINUS2 ( Vyminus2*Uyminus2/Hyminus2 )
-#define VUNEWFLUXPLUS2 ( Vyplus2 *Uyplus2 /Hyplus2 )
-
-void State::calc_finite_difference(double deltaT){
- real_t g = 9.80; // gravitational constant
- real_t ghalf = 0.5*g;
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- size_t ncells = mesh->ncells;
- size_t &ncells_ghost = mesh->ncells_ghost;
-#ifdef _OPENMP
-#pragma omp master
-#endif
- if (ncells_ghost < ncells) ncells_ghost = ncells;
-
- //printf("\nDEBUG finite diff\n");
-
-#ifdef HAVE_MPI
- // We need to populate the ghost regions since the calc neighbors has just been
- // established for the mesh shortly before
- if (mesh->numpe > 1) {
- apply_boundary_conditions_local();
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- H=(state_t *)state_memory.memory_realloc(ncells_ghost, H);
- U=(state_t *)state_memory.memory_realloc(ncells_ghost, U);
- V=(state_t *)state_memory.memory_realloc(ncells_ghost, V);
-
- L7_Update(&H[0], L7_STATE_T, mesh->cell_handle);
- L7_Update(&U[0], L7_STATE_T, mesh->cell_handle);
- L7_Update(&V[0], L7_STATE_T, mesh->cell_handle);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- apply_boundary_conditions_ghost();
- } else {
- apply_boundary_conditions();
- }
-#else
- apply_boundary_conditions();
-#endif
-
- static state_t *H_new, *U_new, *V_new;
- int *nlft, *nrht, *nbot, *ntop, *level;
-
- nlft = mesh->nlft;
- nrht = mesh->nrht;
- nbot = mesh->nbot;
- ntop = mesh->ntop;
- level = mesh->level;
-
- vector<real_t> &lev_deltax = mesh->lev_deltax;
- vector<real_t> &lev_deltay = mesh->lev_deltay;
-
- int flags = 0;
- flags = RESTART_DATA;
-#if defined (HAVE_J7)
- if (mesh->parallel) flags = LOAD_BALANCE_MEMORY;
-#endif
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- {
- H_new = (state_t *)state_memory.memory_malloc(ncells_ghost,
- sizeof(state_t),
- "H_new", flags);
- U_new = (state_t *)state_memory.memory_malloc(ncells_ghost,
- sizeof(state_t),
- "U_new", flags);
- V_new = (state_t *)state_memory.memory_malloc(ncells_ghost,
- sizeof(state_t),
- "V_new", flags);
- }
-#ifdef _OPENMP
-#pragma omp barrier
-#endif
-
- int lowerBound, upperBound;
- mesh->get_bounds(lowerBound, upperBound);
-
- for(int gix = lowerBound; gix < upperBound; gix++) {
-#if DEBUG >= 3
- printf("%d: DEBUG gix is %d at line %d in file %s\n",mesh->mype,gix,__LINE__,__FILE__);
-#endif
-
- int lvl = level[gix];
- int nl = nlft[gix];
- int nr = nrht[gix];
- int nt = ntop[gix];
- int nb = nbot[gix];
-
- real_t Hic = H[gix];
- real_t Uic = U[gix];
- real_t Vic = V[gix];
-
-#if DEBUG >= 3
- if (nl < 0 || nl >= ncells_ghost ) printf("%d: Problem at file %s line %d with nl %ld\n",mesh->mype,__FILE__,__LINE__,nl);
-#endif
- int nll = nlft[nl];
- real_t Hl = H[nl];
- real_t Ul = U[nl];
- real_t Vl = V[nl];
-
-#if DEBUG >= 3
- if (nr < 0 || nr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nr %ld\n",mesh->mype,__FILE__,__LINE__,nr);
-#endif
- int nrr = nrht[nr];
- real_t Hr = H[nr];
- real_t Ur = U[nr];
- real_t Vr = V[nr];
-
-#if DEBUG >= 3
- if (nt < 0 || nt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nt %ld\n",mesh->mype,__FILE__,__LINE__,nt);
-#endif
- int ntt = ntop[nt];
- real_t Ht = H[nt];
- real_t Ut = U[nt];
- real_t Vt = V[nt];
-
-#if DEBUG >= 3
- if (nb < 0 || nb >= ncells_ghost ) printf("%d: Problem at file %s line %d with nb %ld\n",mesh->mype,__FILE__,__LINE__,nb);
-#endif
- int nbb = nbot[nb];
- real_t Hb = H[nb];
- real_t Ub = U[nb];
- real_t Vb = V[nb];
-
- int nlt = ntop[nl];
- int nrt = ntop[nr];
- int ntr = nrht[nt];
- int nbr = nrht[nb];
-
-#if DEBUG >= 3
- if (nll < 0 || nll >= ncells_ghost ) printf("%d: Problem at file %s line %d with nll %ld\n",mesh->mype,__FILE__,__LINE__,nll);
-#endif
- real_t Hll = H[nll];
- real_t Ull = U[nll];
- //real_t Vll = V[nll];
-
-#if DEBUG >= 3
- if (nrr < 0 || nrr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nrr %ld\n",mesh->mype,__FILE__,__LINE__,nrr);
-#endif
- real_t Hrr = H[nrr];
- real_t Urr = U[nrr];
- //real_t Vrr = V[nrr];
-
-#if DEBUG >= 3
- if (ntt < 0 || ntt >= ncells_ghost ) printf("%d: Problem at file %s line %d with ntt %ld\n",mesh->mype,__FILE__,__LINE__,ntt);
-#endif
- real_t Htt = H[ntt];
- //real_t Utt = U[ntt];
- real_t Vtt = V[ntt];
-
-#if DEBUG >= 3
- if (nbb < 0 || nbb >= ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with nbb %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbb); sleep(15); }
-#endif
- real_t Hbb = H[nbb];
- //real_t Ubb = U[nbb];
- real_t Vbb = V[nbb];
-
-#if DEBUG >= 3
- if (lvl < 0 || lvl >= (int)lev_deltax.size() ) printf("%d: Problem at file %s line %d with lvl %d\n",mesh->mype,__FILE__,__LINE__,lvl);
-#endif
- real_t dxic = lev_deltax[lvl];
- real_t dyic = lev_deltay[lvl];
-
- real_t dxl = lev_deltax[level[nl]];
- real_t dxr = lev_deltax[level[nr]];
-
- real_t dyt = lev_deltay[level[nt]];
- real_t dyb = lev_deltay[level[nb]];
-
- real_t drl = dxl;
- real_t drr = dxr;
- real_t drt = dyt;
- real_t drb = dyb;
-
- real_t dric = dxic;
-
- int nltl = 0;
- real_t Hlt = 0.0, Ult = 0.0, Vlt = 0.0;
- real_t Hll2 = 0.0;
- real_t Ull2 = 0.0;
- if(lvl < level[nl]) {
-#if DEBUG >= 3
- if (nlt < 0 || nlt > ncells_ghost ) printf("%d: Problem at file %s line %d with nlt %ld\n",mesh->mype,__FILE__,__LINE__,nlt);
-#endif
- Hlt = H[ ntop[nl] ];
- Ult = U[ ntop[nl] ];
- Vlt = V[ ntop[nl] ];
- nltl = nlft[nlt];
-#if DEBUG >= 3
- if (nltl < 0 || nltl > ncells_ghost ) printf("%d: Problem at file %s line %d with nltl %ld\n",mesh->mype,__FILE__,__LINE__,nltl);
-#endif
- Hll2 = H[nltl];
- Ull2 = U[nltl];
- }
-
- int nrtr = 0;
- real_t Hrt = 0.0, Urt = 0.0, Vrt = 0.0;
- real_t Hrr2 = 0.0;
- real_t Urr2 = 0.0;
- if(lvl < level[nr]) {
-#if DEBUG >= 3
- if (nrt < 0 || nrt > ncells_ghost ) printf("%d: Problem at file %s line %d with nrt %ld\n",mesh->mype,__FILE__,__LINE__,nrt);
-#endif
- Hrt = H[ ntop[nr] ];
- Urt = U[ ntop[nr] ];
- Vrt = V[ ntop[nr] ];
- nrtr = nrht[nrt];
-#if DEBUG >= 3
- if (nrtr < 0 || nrtr > ncells_ghost ) printf("%d: Problem at file %s line %d with nrtr %ld\n",mesh->mype,__FILE__,__LINE__,nrtr);
-#endif
- Hrr2 = H[nrtr];
- Urr2 = U[nrtr];
- }
-
- int nbrb = 0;
- real_t Hbr = 0.0, Ubr = 0.0, Vbr = 0.0;
- real_t Hbb2 = 0.0;
- real_t Vbb2 = 0.0;
- if(lvl < level[nb]) {
-#if DEBUG >= 3
- if (nbr < 0 || nbr > ncells_ghost ) printf("%d: Problem at file %s line %d with nbr %ld\n",mesh->mype,__FILE__,__LINE__,nbr);
-#endif
- Hbr = H[ nrht[nb] ];
- Ubr = U[ nrht[nb] ];
- Vbr = V[ nrht[nb] ];
- nbrb = nbot[nbr];
-#if DEBUG >= 3
- if (nbrb < 0 || nbrb > ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with nbrb %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbrb); sleep(20);}
-#endif
- Hbb2 = H[nbrb];
- Vbb2 = V[nbrb];
- }
-
- int ntrt = 0;
- real_t Htr = 0.0, Utr = 0.0, Vtr = 0.0;
- real_t Htt2 = 0.0;
- real_t Vtt2 = 0.0;
- if(lvl < level[nt]) {
-#if DEBUG >= 3
- if (ntr < 0 || ntr > ncells_ghost ) printf("%d: Problem at file %s line %d with ntr %ld\n",mesh->mype,__FILE__,__LINE__,ntr);
-#endif
- Htr = H[ nrht[nt] ];
- Utr = U[ nrht[nt] ];
- Vtr = V[ nrht[nt] ];
- ntrt = ntop[ntr];
-#if DEBUG >= 3
- if (ntrt < 0 || ntrt > ncells_ghost ) {printf("%d: Problem at file %s line %d ic %d %d with ntrt %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,ntrt); sleep(20); }
-#endif
- Htt2 = H[ntrt];
- Vtt2 = V[ntrt];
- }
-
-
- real_t Hxminus = U_halfstep(deltaT, Hl, Hic, HXFLUXNL, HXFLUXIC,
- dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic));
- real_t Uxminus = U_halfstep(deltaT, Ul, Uic, UXFLUXNL, UXFLUXIC,
- dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic));
- real_t Vxminus = U_halfstep(deltaT, Vl, Vic, UVFLUXNL, UVFLUXIC,
- dxl, dxic, dxl, dxic, SQR(dxl), SQR(dxic));
-
- real_t Hxplus = U_halfstep(deltaT, Hic, Hr, HXFLUXIC, HXFLUXNR,
- dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr));
- real_t Uxplus = U_halfstep(deltaT, Uic, Ur, UXFLUXIC, UXFLUXNR,
- dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr));
- real_t Vxplus = U_halfstep(deltaT, Vic, Vr, UVFLUXIC, UVFLUXNR,
- dxic, dxr, dxic, dxr, SQR(dxic), SQR(dxr));
-
- real_t Hyminus = U_halfstep(deltaT, Hb, Hic, HYFLUXNB, HYFLUXIC,
- dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic));
- real_t Uyminus = U_halfstep(deltaT, Ub, Uic, VUFLUXNB, VUFLUXIC,
- dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic));
- real_t Vyminus = U_halfstep(deltaT, Vb, Vic, VYFLUXNB, VYFLUXIC,
- dyb, dyic, dyb, dyic, SQR(dyb), SQR(dyic));
-
- real_t Hyplus = U_halfstep(deltaT, Hic, Ht, HYFLUXIC, HYFLUXNT,
- dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt));
- real_t Uyplus = U_halfstep(deltaT, Uic, Ut, VUFLUXIC, VUFLUXNT,
- dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt));
- real_t Vyplus = U_halfstep(deltaT, Vic, Vt, VYFLUXIC, VYFLUXNT,
- dyic, dyt, dyic, dyt, SQR(dyic), SQR(dyt));
-
- real_t Hxfluxminus = HNEWXFLUXMINUS;
- real_t Uxfluxminus = UNEWXFLUXMINUS;
- real_t Vxfluxminus = UVNEWFLUXMINUS;
-
- real_t Hxfluxplus = HNEWXFLUXPLUS;
- real_t Uxfluxplus = UNEWXFLUXPLUS;
- real_t Vxfluxplus = UVNEWFLUXPLUS;
-
- real_t Hyfluxminus = HNEWYFLUXMINUS;
- real_t Uyfluxminus = VUNEWFLUXMINUS;
- real_t Vyfluxminus = VNEWYFLUXMINUS;
-
- real_t Hyfluxplus = HNEWYFLUXPLUS;
- real_t Uyfluxplus = VUNEWFLUXPLUS;
- real_t Vyfluxplus = VNEWYFLUXPLUS;
-
- real_t Hxminus2 = 0.0;
- real_t Uxminus2 = 0.0;
- real_t Vxminus2 = 0.0;
- if(lvl < level[nl]) {
-
- Hxminus2 = U_halfstep(deltaT, Hlt, Hic, HXFLUXNLT, HXFLUXIC,
- drl, dric, drl, dric, SQR(drl), SQR(dric));
- Uxminus2 = U_halfstep(deltaT, Ult, Uic, UXFLUXNLT, UXFLUXIC,
- drl, dric, drl, dric, SQR(drl), SQR(dric));
- Vxminus2 = U_halfstep(deltaT, Vlt, Vic, UVFLUXNLT, UVFLUXIC,
- drl, dric, drl, dric, SQR(drl), SQR(dric));
-
- Hxfluxminus = (Hxfluxminus + HNEWXFLUXMINUS2) * HALF;
- Uxfluxminus = (Uxfluxminus + UNEWXFLUXMINUS2) * HALF;
- Vxfluxminus = (Vxfluxminus + UVNEWFLUXMINUS2) * HALF;
-
- }
-
- real_t Hxplus2 = 0.0;
- real_t Uxplus2 = 0.0;
- real_t Vxplus2 = 0.0;
- if(lvl < level[nr]) {
-
- Hxplus2 = U_halfstep(deltaT, Hic, Hrt, HXFLUXIC, HXFLUXNRT,
- dric, drr, dric, drr, SQR(dric), SQR(drr));
- Uxplus2 = U_halfstep(deltaT, Uic, Urt, UXFLUXIC, UXFLUXNRT,
- dric, drr, dric, drr, SQR(dric), SQR(drr));
- Vxplus2 = U_halfstep(deltaT, Vic, Vrt, UVFLUXIC, UVFLUXNRT,
- dric, drr, dric, drr, SQR(dric), SQR(drr));
-
- Hxfluxplus = (Hxfluxplus + HNEWXFLUXPLUS2) * HALF;
- Uxfluxplus = (Uxfluxplus + UNEWXFLUXPLUS2) * HALF;
- Vxfluxplus = (Vxfluxplus + UVNEWFLUXPLUS2) * HALF;
-
- }
-
- real_t Hyminus2 = 0.0;
- real_t Uyminus2 = 0.0;
- real_t Vyminus2 = 0.0;
- if(lvl < level[nb]) {
-
- Hyminus2 = U_halfstep(deltaT, Hbr, Hic, HYFLUXNBR, HYFLUXIC,
- drb, dric, drb, dric, SQR(drb), SQR(dric));
- Uyminus2 = U_halfstep(deltaT, Ubr, Uic, VUFLUXNBR, VUFLUXIC,
- drb, dric, drb, dric, SQR(drb), SQR(dric));
- Vyminus2 = U_halfstep(deltaT, Vbr, Vic, VYFLUXNBR, VYFLUXIC,
- drb, dric, drb, dric, SQR(drb), SQR(dric));
-
- Hyfluxminus = (Hyfluxminus + HNEWYFLUXMINUS2) * HALF;
- Uyfluxminus = (Uyfluxminus + VUNEWFLUXMINUS2) * HALF;
- Vyfluxminus = (Vyfluxminus + VNEWYFLUXMINUS2) * HALF;
-
- }
-
- real_t Hyplus2 = 0.0;
- real_t Uyplus2 = 0.0;
- real_t Vyplus2 = 0.0;
- if(lvl < level[nt]) {
-
- Hyplus2 = U_halfstep(deltaT, Hic, Htr, HYFLUXIC, HYFLUXNTR,
- dric, drt, dric, drt, SQR(dric), SQR(drt));
- Uyplus2 = U_halfstep(deltaT, Uic, Utr, VUFLUXIC, VUFLUXNTR,
- dric, drt, dric, drt, SQR(dric), SQR(drt));
- Vyplus2 = U_halfstep(deltaT, Vic, Vtr, VYFLUXIC, VYFLUXNTR,
- dric, drt, dric, drt, SQR(dric), SQR(drt));
-
- Hyfluxplus = (Hyfluxplus + HNEWYFLUXPLUS2) * HALF;
- Uyfluxplus = (Uyfluxplus + VUNEWFLUXPLUS2) * HALF;
- Vyfluxplus = (Vyfluxplus + VNEWYFLUXPLUS2) * HALF;
-
- }
-
- //if (DEBUG >= 2) {
- // printf("1st pass x direction nz %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf\n",
- // gix, nl, nr,
- // Hxplus,Hxplus2,Uxplus,Uxplus2,Vxplus,Vxplus2);
- // //H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]);
- //}
-
- ////////////////////////////////////////
- /// Artificial Viscosity corrections ///
- ////////////////////////////////////////
-
-
- if(level[nl] < level[nll]) {
-#if DEBUG >= 3
- size_t nllt = ntop[nll];
- if (nllt < 0 || nllt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nllt %ld\n",mesh->mype,__FILE__,__LINE__,nllt);
-#endif
- Hll = (Hll + H[ ntop[nll] ]) * HALF;
- Ull = (Ull + U[ ntop[nll] ]) * HALF;
- }
-
- real_t Hr2 = Hr;
- real_t Ur2 = Ur;
- if(lvl < level[nr]) {
- Hr2 = (Hr2 + Hrt) * HALF;
- Ur2 = (Ur2 + Urt) * HALF;
- }
-
- real_t wminusx_H = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
- Hic-Hl, Hl-Hll, Hr2-Hic);
-
- wminusx_H *= Hic - Hl;
-
- if(lvl < level[nl]) {
- if(level[nlt] < level[nltl])
- Hll2 = (Hll2 + H[ ntop[nltl] ]) * HALF;
- wminusx_H = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
- sqrt(g*Hxminus2), Hic-Hlt, Hlt-Hll2, Hr2-Hic) *
- (Hic - Hlt)) + wminusx_H)*HALF*HALF;
- }
-
-
- if(level[nr] < level[nrr]) {
-#if DEBUG >= 3
- size_t nrrt = ntop[nrr];
- if (nrrt < 0 || nrrt >= ncells_ghost ) printf("%d: Problem at file %s line %d with nrrt %ld\n",mesh->mype,__FILE__,__LINE__,nrrt);
-#endif
- Hrr = (Hrr + H[ ntop[nrr] ]) * HALF;
- Urr = (Urr + U[ ntop[nrr] ]) * HALF;
- }
-
- real_t Hl2 = Hl;
- real_t Ul2 = Ul;
- if(lvl < level[nl]) {
- Hl2 = (Hl2 + Hlt) * HALF;
- Ul2 = (Ul2 + Ult) * HALF;
- }
-
- real_t wplusx_H = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
- Hr-Hic, Hic-Hl2, Hrr-Hr);
-
- wplusx_H *= Hr - Hic;
-
- if(lvl < level[nr]) {
- if(level[nrt] < level[nrtr])
- Hrr2 = (Hrr2 + H[ ntop[nrtr] ]) * HALF;
- wplusx_H = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
- sqrt(g*Hxplus2), Hrt-Hic, Hic-Hl2, Hrr2-Hrt) *
- (Hrt - Hic))+wplusx_H)*HALF*HALF;
- }
-
-
- real_t wminusx_U = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
- Uic-Ul, Ul-Ull, Ur2-Uic);
-
- wminusx_U *= Uic - Ul;
-
- if(lvl < level[nl]) {
- if(level[nlt] < level[nltl])
- Ull2 = (Ull2 + U[ ntop[nltl] ]) * HALF;
- wminusx_U = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
- sqrt(g*Hxminus2), Uic-Ult, Ult-Ull2, Ur2-Uic) *
- (Uic - Ult))+wminusx_U)*HALF*HALF;
- }
-
-
- real_t wplusx_U = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
- Ur-Uic, Uic-Ul2, Urr-Ur);
-
- wplusx_U *= Ur - Uic;
-
- if(lvl < level[nr]) {
- if(level[nrt] < level[nrtr])
- Urr2 = (Urr2 + U[ ntop[nrtr] ]) * HALF;
- wplusx_U = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
- sqrt(g*Hxplus2), Urt-Uic, Uic-Ul2, Urr2-Urt) *
- (Urt - Uic))+wplusx_U)*HALF*HALF;
- }
-
-
- if(level[nb] < level[nbb]) {
-#if DEBUG >= 3
- size_t nbbr = nrht[nbb];
- if (nbbr < 0 || nbbr >= ncells_ghost ) printf("%d: Problem at file %s line %d gix %d %d with nbbr %ld\n",mesh->mype,__FILE__,__LINE__,gix,gix+mesh->noffset,nbbr);
-#endif
- Hbb = (Hbb + H[ nrht[nbb] ]) * HALF;
- Vbb = (Vbb + V[ nrht[nbb] ]) * HALF;
- }
-
- real_t Ht2 = Ht;
- real_t Vt2 = Vt;
- if(lvl < level[nt]) {
- Ht2 = (Ht2 + Htr) * HALF;
- Vt2 = (Vt2 + Vtr) * HALF;
- }
-
- real_t wminusy_H = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
- Hic-Hb, Hb-Hbb, Ht2-Hic);
-
- wminusy_H *= Hic - Hb;
-
- if(lvl < level[nb]) {
- if(level[nbr] < level[nbrb])
- Hbb2 = (Hbb2 + H[ nrht[nbrb] ]) * HALF;
- wminusy_H = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
- sqrt(g*Hyminus2), Hic-Hbr, Hbr-Hbb2, Ht2-Hic) *
- (Hic - Hbr))+wminusy_H)*HALF*HALF;
- }
-
-
- if(level[nt] < level[ntt]) {
-#if DEBUG >= 3
- size_t nttr = nrht[ntt];
- if (nttr < 0 || nttr >= ncells_ghost ) printf("%d: Problem at file %s line %d with nttr %ld\n",mesh->mype,__FILE__,__LINE__,nttr);
-#endif
- Htt = (Htt + H[ nrht[ntt] ]) * HALF;
- Vtt = (Vtt + V[ nrht[ntt] ]) * HALF;
- }
-
- real_t Hb2 = Hb;
- real_t Vb2 = Vb;
- if(lvl < level[nb]) {
- Hb2 = (Hb2 + Hbr) * HALF;
- Vb2 = (Vb2 + Vbr) * HALF;
- }
-
- real_t wplusy_H = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
- Ht-Hic, Hic-Hb2, Htt-Ht);
-
- wplusy_H *= Ht - Hic;
-
- if(lvl < level[nt]) {
- if(level[ntr] < level[ntrt])
- Htt2 = (Htt2 + H[ nrht[ntrt] ]) * HALF;
- wplusy_H = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
- sqrt(g*Hyplus2), Htr-Hic, Hic-Hb2, Htt2-Htr) *
- (Htr - Hic))+wplusy_H)*HALF*HALF;
- }
-
- real_t wminusy_V = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
- Vic-Vb, Vb-Vbb, Vt2-Vic);
-
- wminusy_V *= Vic - Vb;
-
- if(lvl < level[nb]) {
- if(level[nbr] < level[nbrb])
- Vbb2 = (Vbb2 + V[ nrht[nbrb] ]) * HALF;
- wminusy_V = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
- sqrt(g*Hyminus2), Vic-Vbr, Vbr-Vbb2, Vt2-Vic) *
- (Vic - Vbr))+wminusy_V)*HALF*HALF;
- }
-
- real_t wplusy_V = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
- Vt-Vic, Vic-Vb2, Vtt-Vt);
-
- wplusy_V *= Vt - Vic;
-
- if(lvl < level[nt]) {
- if(level[ntr] < level[ntrt])
- Vtt2 = (Vtt2 + V[ nrht[ntrt] ]) * HALF;
- wplusy_V = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
- sqrt(g*Hyplus2), Vtr-Vic, Vic-Vb2, Vtt2-Vtr) *
- (Vtr - Vic))+wplusy_V)*HALF*HALF;
- }
-
- H_new[gix] = U_fullstep(deltaT, dxic, Hic,
- Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus)
- - wminusx_H + wplusx_H - wminusy_H + wplusy_H;
- U_new[gix] = U_fullstep(deltaT, dxic, Uic,
- Uxfluxplus, Uxfluxminus, Uyfluxplus, Uyfluxminus)
- - wminusx_U + wplusx_U;
- V_new[gix] = U_fullstep(deltaT, dxic, Vic,
- Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus)
- - wminusy_V + wplusy_V;
-
-#if DEBUG >= 1
- if (DEBUG >= 1) {
- real_t U_tmp = U_new[gix];
- real_t V_tmp = V_new[gix];
- if (U_tmp == 0.0) U_tmp = 0.0;
- if (V_tmp == 0.0) V_tmp = 0.0;
- printf("DEBUG ic %d H_new %lf U_new %lf V_new %lf\n",gix,H_new[gix],U_tmp,V_tmp);
- }
-#endif
-
-/*
- printf("DEBUG ic %d deltaT, %lf dxic, %lf Hic, %lf Hxfluxplus, %lf Hxfluxminus, %lf Hyfluxplus, %lf Hyfluxminus %lf\n",
- gix, deltaT, dxic, Hic, Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus);
- printf("DEBUG ic %d wminusx_H %lf wplusx_H %lf wminusy_H %lf wplusy_H %lf\n",gix, wminusx_H, wplusx_H, wminusy_H, wplusy_H);
- printf("DEBUG ic %d deltaT, %lf dxic, %lf Vic, %lf Vxfluxplus, %lf Vxfluxminus, %lf Vyfluxplus, %lf Vyfluxminus %lf\n",
- gix, deltaT, dxic, Vic, Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus);
- printf("DEBUG ic %d wminusy_V %lf wplusy_V %lf\n",gix, wminusy_V, wplusy_V);
-*/
- } // cell loop
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- // Replace H with H_new and deallocate H. New memory will have the characteristics
- // of the new memory and the name of the old. Both return and arg1 will be reset to new memory
- H = (state_t *)state_memory.memory_replace(H, H_new);
- U = (state_t *)state_memory.memory_replace(U, U_new);
- V = (state_t *)state_memory.memory_replace(V, V_new);
-
- //state_memory.memory_report();
- //printf("DEBUG end finite diff\n\n");
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += cpu_timer_stop(tstart_cpu);
-}
-
-void State::calc_finite_difference_via_faces(double deltaT){
- real_t g = 9.80; // gravitational constant
- real_t ghalf = HALF*g;
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- size_t ncells = mesh->ncells;
- size_t &ncells_ghost = mesh->ncells_ghost;
-#ifdef _OPENMP
-#pragma omp master
-#endif
- if (ncells_ghost < ncells) ncells_ghost = ncells;
-
- //printf("\nDEBUG finite diff\n");
-
-#ifdef HAVE_MPI
- // We need to populate the ghost regions since the calc neighbors has just been
- // established for the mesh shortly before
- if (mesh->numpe > 1) {
- apply_boundary_conditions_local();
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- H=(state_t *)state_memory.memory_realloc(ncells_ghost, H);
- U=(state_t *)state_memory.memory_realloc(ncells_ghost, U);
- V=(state_t *)state_memory.memory_realloc(ncells_ghost, V);
-
- L7_Update(&H[0], L7_STATE_T, mesh->cell_handle);
- L7_Update(&U[0], L7_STATE_T, mesh->cell_handle);
- L7_Update(&V[0], L7_STATE_T, mesh->cell_handle);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- apply_boundary_conditions_ghost();
- } else {
- apply_boundary_conditions();
- }
-#else
- apply_boundary_conditions();
-#endif
-
- int *nlft, *nrht, *nbot, *ntop, *level;
-
- nlft = mesh->nlft;
- nrht = mesh->nrht;
- nbot = mesh->nbot;
- ntop = mesh->ntop;
- level = mesh->level;
-
- vector<real_t> &lev_deltax = mesh->lev_deltax;
- vector<real_t> &lev_deltay = mesh->lev_deltay;
-
- int flags = 0;
- flags = RESTART_DATA;
-#if defined (HAVE_J7)
- if (mesh->parallel) flags = LOAD_BALANCE_MEMORY;
-#endif
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- mesh->calc_face_list_wbidirmap();
-#ifdef _OPENMP
- }
-#endif
-
- static vector<state_t> Hx, Ux, Vx;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- Hx.resize(mesh->nxface);
- Ux.resize(mesh->nxface);
- Vx.resize(mesh->nxface);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int iface = 0; iface < mesh->nxface; iface++){
- int cell_lower = mesh->map_xface2cell_lower[iface];
- int cell_upper = mesh->map_xface2cell_upper[iface];
- int level_lower = level[cell_lower];
- int level_upper = level[cell_upper];
- if (level_lower == level_upper) {
- int lev = level_upper;
- real_t Cxhalf = 0.5*deltaT/mesh->lev_deltax[lev];
- Hx[iface]=HALF*(H[cell_upper]+H[cell_lower]) - Cxhalf*( HXFLUX(cell_upper)-HXFLUX(cell_lower) );
- Ux[iface]=HALF*(U[cell_upper]+U[cell_lower]) - Cxhalf*( UXFLUX(cell_upper)-UXFLUX(cell_lower) );
- Vx[iface]=HALF*(V[cell_upper]+V[cell_lower]) - Cxhalf*( UVFLUX(cell_upper)-UVFLUX(cell_lower) );
- } else {
- real_t dx_lower = mesh->lev_deltax[level[cell_lower]];
- real_t dx_upper = mesh->lev_deltax[level[cell_upper]];
-
- real_t FA_lower = dx_lower;
- real_t FA_upper = dx_upper;
- real_t FA_lolim = FA_lower*min(ONE, FA_upper/FA_lower);
- real_t FA_uplim = FA_upper*min(ONE, FA_lower/FA_upper);
-
- real_t CV_lower = SQ(dx_lower);
- real_t CV_upper = SQ(dx_upper);
- real_t CV_lolim = CV_lower*min(HALF, CV_upper/CV_lower);
- real_t CV_uplim = CV_upper*min(HALF, CV_lower/CV_upper);
-
- // Weighted half-step calculation
- //
- // (dx_lower*H[cell_upper]+dx_upper*H[cell_lower])
- // ----------------------------------------------- -
- // (dx_lower+dx_upper)
- //
- // ( (FA_uplim*HXFLUX(cell_upper))-(FA_lolim*HXFLUX(cell_lower)) )
- // 0.5*deltaT * ----------------------------------------------------------------
- // (CV_uplim+CV_lolim)
- //
-
- Hx[iface]=(dx_lower*H[cell_upper]+dx_upper*H[cell_lower])/(dx_lower+dx_upper) -
- HALF*deltaT*( (FA_uplim*HXFLUX(cell_upper))-(FA_lolim*HXFLUX(cell_lower)) )/
- (CV_uplim+CV_lolim);
- Ux[iface]=(dx_lower*U[cell_upper]+dx_upper*U[cell_lower])/(dx_lower+dx_upper) -
- HALF*deltaT*( (FA_uplim*UXFLUX(cell_upper))-(FA_lolim*UXFLUX(cell_lower)) )/
- (CV_uplim+CV_lolim);
- Vx[iface]=(dx_lower*V[cell_upper]+dx_upper*V[cell_lower])/(dx_lower+dx_upper) -
- HALF*deltaT*( (FA_uplim*UVFLUX(cell_upper))-(FA_lolim*UVFLUX(cell_lower)) )/
- (CV_uplim+CV_lolim);
- }
-#if DEBUG >= 2
- if (DEBUG >= 2) {
- printf("1st pass x direction iface %d i %d j %d lev %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
- iface, mesh->xface_i[iface], mesh->xface_j[iface], mesh->xface_level[iface],
- mesh->map_xface2cell_lower[iface], mesh->map_xface2cell_upper[iface],
- Hx[iface],Ux[iface],Vx[iface],
- H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]);
- }
-#endif
- }
-#if DEBUG >= 2
- if (DEBUG >= 2) {
- printf("\n");
- }
-#endif
-
- static vector<state_t> Hy, Uy, Vy;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- Hy.resize(mesh->nyface);
- Uy.resize(mesh->nyface);
- Vy.resize(mesh->nyface);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp for
-#endif
- for (int iface = 0; iface < mesh->nyface; iface++){
- int cell_lower = mesh->map_yface2cell_lower[iface];
- int cell_upper = mesh->map_yface2cell_upper[iface];
- int level_lower = level[cell_lower];
- int level_upper = level[cell_upper];
- if (level_lower == level_upper) {
- int lev = level_upper;
- real_t Cyhalf = 0.5*deltaT/mesh->lev_deltay[lev];
- Hy[iface]=HALF*(H[cell_upper]+H[cell_lower]) - Cyhalf*( HYFLUX(cell_upper)-HYFLUX(cell_lower) );
- Uy[iface]=HALF*(U[cell_upper]+U[cell_lower]) - Cyhalf*( UVFLUX(cell_upper)-UVFLUX(cell_lower) );
- Vy[iface]=HALF*(V[cell_upper]+V[cell_lower]) - Cyhalf*( VYFLUX(cell_upper)-VYFLUX(cell_lower) );
- } else {
- real_t dy_lower = mesh->lev_deltay[level[cell_lower]];
- real_t dy_upper = mesh->lev_deltay[level[cell_upper]];
-
- real_t FA_lower = dy_lower;
- real_t FA_upper = dy_upper;
- real_t FA_lolim = FA_lower*min(ONE, FA_upper/FA_lower);
- real_t FA_uplim = FA_upper*min(ONE, FA_lower/FA_upper);
-
- real_t CV_lower = SQ(dy_lower);
- real_t CV_upper = SQ(dy_upper);
- real_t CV_lolim = CV_lower*min(HALF, CV_upper/CV_lower);
- real_t CV_uplim = CV_upper*min(HALF, CV_lower/CV_upper);
-
- // Weighted half-step calculation
- //
- // (dy_lower*H[cell_upper]+dy_upper*H[cell_lower])
- // ----------------------------------------------- -
- // (dy_lower+dy_upper)
- //
- // ( (FA_uplim*HYFLUX(cell_upper))-(FA_lolim*HYFLUX(cell_lower)) )
- // 0.5*deltaT * ----------------------------------------------------------------
- // (CV_uplim+CV_lolim)
- //
-
- Hy[iface]=(dy_lower*H[cell_upper]+dy_upper*H[cell_lower])/(dy_lower+dy_upper) -
- HALF*deltaT*( (FA_uplim*HYFLUX(cell_upper))-(FA_lolim*HYFLUX(cell_lower)) )/
- (CV_uplim+CV_lolim);
- Uy[iface]=(dy_lower*U[cell_upper]+dy_upper*U[cell_lower])/(dy_lower+dy_upper) -
- HALF*deltaT*( (FA_uplim*UVFLUX(cell_upper))-(FA_lolim*UVFLUX(cell_lower)) )/
- (CV_uplim+CV_lolim);
- Vy[iface]=(dy_lower*V[cell_upper]+dy_upper*V[cell_lower])/(dy_lower+dy_upper) -
- HALF*deltaT*( (FA_uplim*VYFLUX(cell_upper))-(FA_lolim*VYFLUX(cell_lower)) )/
- (CV_uplim+CV_lolim);
-
- }
-
-#if DEBUG >= 2
- if (DEBUG >= 2) {
- printf("1st pass y direction iface %d i %d j %d lev %d nzlower %d nzupper %d %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
- iface, mesh->yface_i[iface], mesh->yface_j[iface], mesh->yface_level[iface],
- mesh->map_yface2cell_lower[iface], mesh->map_yface2cell_upper[iface],
- Hy[iface],Uy[iface],Vy[iface],
- H[cell_upper],H[cell_lower],U[cell_upper],U[cell_lower],V[cell_upper],V[cell_lower]);
- }
-#endif
- }
-#if DEBUG >= 2
- if (DEBUG >= 2) {
- printf("\n");
- }
-#endif
-
- static state_t *H_new, *U_new, *V_new;
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- H_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "H_new", flags);
- U_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "U_new", flags);
- V_new = (state_t *)state_memory.memory_malloc(mesh->ncells_ghost, sizeof(state_t), "V_new", flags);
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
- int lowerBound, upperBound;
-
- mesh->get_bounds(lowerBound, upperBound);
- for (int ic = lowerBound; ic < upperBound; ic++){
-
- int lvl = level[ic];
- int nl = nlft[ic];
- int nr = nrht[ic];
- int nt = ntop[ic];
- int nb = nbot[ic];
-
- real_t Hic = H[ic];
- real_t Uic = U[ic];
- real_t Vic = V[ic];
-
- int nll = nlft[nl];
- real_t Hl = H[nl];
- real_t Ul = U[nl];
- //real_t Vl = V[nl];
-
- int nrr = nrht[nr];
- real_t Hr = H[nr];
- real_t Ur = U[nr];
- //real_t Vr = V[nr];
-
- int ntt = ntop[nt];
- real_t Ht = H[nt];
- //real_t Ut = U[nt];
- real_t Vt = V[nt];
-
- int nbb = nbot[nb];
- real_t Hb = H[nb];
- //real_t Ub = U[nb];
- real_t Vb = V[nb];
-
- int nlt = ntop[nl];
- int nrt = ntop[nr];
- int ntr = nrht[nt];
- int nbr = nrht[nb];
-
- real_t Hll = H[nll];
- real_t Ull = U[nll];
- //real_t Vll = V[nll];
-
- real_t Hrr = H[nrr];
- real_t Urr = U[nrr];
- //real_t Vrr = V[nrr];
-
- real_t Htt = H[ntt];
- //real_t Utt = U[ntt];
- real_t Vtt = V[ntt];
-
- real_t Hbb = H[nbb];
- //real_t Ubb = U[nbb];
- real_t Vbb = V[nbb];
-
- real_t dxic = lev_deltax[lvl];
- //real_t dyic = lev_deltay[lvl];
-
- real_t dxl = lev_deltax[level[nl]];
- real_t dxr = lev_deltax[level[nr]];
-
- real_t dyt = lev_deltay[level[nt]];
- real_t dyb = lev_deltay[level[nb]];
-
- //real_t drl = dxl;
- //real_t drr = dxr;
- //real_t drt = dyt;
- //real_t drb = dyb;
-
- real_t dric = dxic;
-
- int nltl = 0;
- real_t Hlt = 0.0, Ult = 0.0; // Vlt = 0.0;
- real_t Hll2 = 0.0;
- real_t Ull2 = 0.0;
- if(lvl < level[nl]) {
- Hlt = H[ ntop[nl] ];
- Ult = U[ ntop[nl] ];
- //Vlt = V[ ntop[nl] ];
-
- nltl = nlft[nlt];
- Hll2 = H[nltl];
- Ull2 = U[nltl];
- }
-
- int nrtr = 0;
- real_t Hrt = 0.0, Urt = 0.0; // Vrt = 0.0;
- real_t Hrr2 = 0.0;
- real_t Urr2 = 0.0;
- if(lvl < level[nr]) {
- Hrt = H[ ntop[nr] ];
- Urt = U[ ntop[nr] ];
- //Vrt = V[ ntop[nr] ];
-
- nrtr = nrht[nrt];
- Hrr2 = H[nrtr];
- Urr2 = U[nrtr];
- }
-
- int nbrb = 0;
- real_t Hbr = 0.0, Vbr = 0.0; // Ubr = 0.0
- real_t Hbb2 = 0.0;
- real_t Vbb2 = 0.0;
- if(lvl < level[nb]) {
- Hbr = H[ nrht[nb] ];
- //Ubr = U[ nrht[nb] ];
- Vbr = V[ nrht[nb] ];
-
- nbrb = nbot[nbr];
- Hbb2 = H[nbrb];
- Vbb2 = V[nbrb];
- }
-
- int ntrt = 0;
- real_t Htr = 0.0, Vtr = 0.0; // Utr = 0.0
- real_t Htt2 = 0.0;
- real_t Vtt2 = 0.0;
- if(lvl < level[nt]) {
- Htr = H[ nrht[nt] ];
- //Utr = U[ nrht[nt] ];
- Vtr = V[ nrht[nt] ];
-
- ntrt = ntop[ntr];
- Htt2 = H[ntrt];
- Vtt2 = V[ntrt];
- }
-
- ////////////////////////////////////////
- /// Artificial Viscosity corrections ///
- ////////////////////////////////////////
-
- real_t Hxminus = H[ic];
- real_t Uxminus = 0.0;
- real_t Vxminus = 0.0;
- if (mesh->map_xcell2face_left1[ic] >= 0){
- Hxminus = Hx[mesh->map_xcell2face_left1[ic]];
- Uxminus = Ux[mesh->map_xcell2face_left1[ic]];
- Vxminus = Vx[mesh->map_xcell2face_left1[ic]];
- }
-
- real_t Hxminus2 = 0.0;
- if(lvl < level[nl]) Hxminus2 = H[ic];
- real_t Uxminus2 = 0.0;
- real_t Vxminus2 = 0.0;
- if (mesh->map_xcell2face_left2[ic] >= 0) {
- Hxminus2 = Hx[mesh->map_xcell2face_left2[ic]];
- Uxminus2 = Ux[mesh->map_xcell2face_left2[ic]];
- Vxminus2 = Vx[mesh->map_xcell2face_left2[ic]];
- }
-
- real_t Hxplus = H[ic];
- real_t Uxplus = 0.0;
- real_t Vxplus = 0.0;
- if (mesh->map_xcell2face_right1[ic] >= 0){
- Hxplus = Hx[mesh->map_xcell2face_right1[ic]];
- Uxplus = Ux[mesh->map_xcell2face_right1[ic]];
- Vxplus = Vx[mesh->map_xcell2face_right1[ic]];
- }
-
- real_t Hxplus2 = 0.0;
- if(lvl < level[nr]) Hxplus2 = H[ic];
- real_t Uxplus2 = 0.0;
- real_t Vxplus2 = 0.0;
- if (mesh->map_xcell2face_right2[ic] >= 0){
- Hxplus2 = Hx[mesh->map_xcell2face_right2[ic]];
- Uxplus2 = Ux[mesh->map_xcell2face_right2[ic]];
- Vxplus2 = Vx[mesh->map_xcell2face_right2[ic]];
- }
-
- if(level[nl] < level[nll]) {
- Hll = (Hll + H[ ntop[nll] ]) * HALF;
- Ull = (Ull + U[ ntop[nll] ]) * HALF;
- }
-
- real_t Hr2 = Hr;
- real_t Ur2 = Ur;
- if(lvl < level[nr]) {
- Hr2 = (Hr2 + Hrt) * HALF;
- Ur2 = (Ur2 + Urt) * HALF;
- }
-
- real_t wminusx_H = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
- Hic-Hl, Hl-Hll, Hr2-Hic);
-
- wminusx_H *= Hic - Hl;
-
- if(lvl < level[nl]) {
- if(level[nlt] < level[nltl])
- Hll2 = (Hll2 + H[ ntop[nltl] ]) * HALF;
- wminusx_H = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
- sqrt(g*Hxminus2), Hic-Hlt, Hlt-Hll2, Hr2-Hic) *
- (Hic - Hlt)) + wminusx_H)*HALF*HALF;
- }
-
- if(level[nr] < level[nrr]) {
- Hrr = (Hrr + H[ ntop[nrr] ]) * HALF;
- Urr = (Urr + U[ ntop[nrr] ]) * HALF;
- }
-
- real_t Hl2 = Hl;
- real_t Ul2 = Ul;
- if(lvl < level[nl]) {
- Hl2 = (Hl2 + Hlt) * HALF;
- Ul2 = (Ul2 + Ult) * HALF;
- }
-
- real_t wplusx_H = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
- Hr-Hic, Hic-Hl2, Hrr-Hr);
-
- wplusx_H *= Hr - Hic;
-
- if(lvl < level[nr]) {
- if(level[nrt] < level[nrtr])
- Hrr2 = (Hrr2 + H[ ntop[nrtr] ]) * HALF;
- wplusx_H = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
- sqrt(g*Hxplus2), Hrt-Hic, Hic-Hl2, Hrr2-Hrt) *
- (Hrt - Hic))+wplusx_H)*HALF*HALF;
- }
-
-
- real_t wminusx_U = w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus/Hxminus) + sqrt(g*Hxminus),
- Uic-Ul, Ul-Ull, Ur2-Uic);
-
- wminusx_U *= Uic - Ul;
-
- if(lvl < level[nl]) {
- if(level[nlt] < level[nltl])
- Ull2 = (Ull2 + U[ ntop[nltl] ]) * HALF;
- wminusx_U = ((w_corrector(deltaT, (dric+dxl)*HALF, fabs(Uxminus2/Hxminus2) +
- sqrt(g*Hxminus2), Uic-Ult, Ult-Ull2, Ur2-Uic) *
- (Uic - Ult))+wminusx_U)*HALF*HALF;
- }
-
-
- real_t wplusx_U = w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus/Hxplus) + sqrt(g*Hxplus),
- Ur-Uic, Uic-Ul2, Urr-Ur);
-
- wplusx_U *= Ur - Uic;
-
- if(lvl < level[nr]) {
- if(level[nrt] < level[nrtr])
- Urr2 = (Urr2 + U[ ntop[nrtr] ]) * HALF;
- wplusx_U = ((w_corrector(deltaT, (dric+dxr)*HALF, fabs(Uxplus2/Hxplus2) +
- sqrt(g*Hxplus2), Urt-Uic, Uic-Ul2, Urr2-Urt) *
- (Urt - Uic))+wplusx_U)*HALF*HALF;
- }
-
-
- if(level[nb] < level[nbb]) {
- Hbb = (Hbb + H[ nrht[nbb] ]) * HALF;
- Vbb = (Vbb + V[ nrht[nbb] ]) * HALF;
- }
-
- real_t Ht2 = Ht;
- real_t Vt2 = Vt;
- if(lvl < level[nt]) {
- Ht2 = (Ht2 + Htr) * HALF;
- Vt2 = (Vt2 + Vtr) * HALF;
- }
-
- real_t Hyminus = H[ic];
- real_t Uyminus = 0.0;
- real_t Vyminus = 0.0;
- if (mesh->map_ycell2face_bot1[ic] >= 0){
- Hyminus = Hy[mesh->map_ycell2face_bot1[ic]];
- Uyminus = Uy[mesh->map_ycell2face_bot1[ic]];
- Vyminus = Vy[mesh->map_ycell2face_bot1[ic]];
- }
-
- real_t Hyminus2 = 0.0;
- if(lvl < level[nb]) Hyminus2 = H[ic];
- real_t Uyminus2 = 0.0;
- real_t Vyminus2 = 0.0;
- if (mesh->map_ycell2face_bot2[ic] >= 0){
- Hyminus2 = Hy[mesh->map_ycell2face_bot2[ic]];
- Uyminus2 = Uy[mesh->map_ycell2face_bot2[ic]];
- Vyminus2 = Vy[mesh->map_ycell2face_bot2[ic]];
- }
-
- real_t Hyplus = H[ic];
- real_t Uyplus = 0.0;
- real_t Vyplus = 0.0;
- if (mesh->map_ycell2face_top1[ic] >= 0){
- Hyplus = Hy[mesh->map_ycell2face_top1[ic]];
- Uyplus = Uy[mesh->map_ycell2face_top1[ic]];
- Vyplus = Vy[mesh->map_ycell2face_top1[ic]];
- }
-
- real_t Hyplus2 = 0.0;
- if(lvl < level[nt]) Hyplus2 = H[ic];
- real_t Uyplus2 = 0.0;
- real_t Vyplus2 = 0.0;
- if (mesh->map_ycell2face_top2[ic] >= 0){
- Hyplus2 = Hy[mesh->map_ycell2face_top2[ic]];
- Uyplus2 = Uy[mesh->map_ycell2face_top2[ic]];
- Vyplus2 = Vy[mesh->map_ycell2face_top2[ic]];
- }
-
- real_t wminusy_H = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
- Hic-Hb, Hb-Hbb, Ht2-Hic);
-
- wminusy_H *= Hic - Hb;
-
- if(lvl < level[nb]) {
- if(level[nbr] < level[nbrb])
- Hbb2 = (Hbb2 + H[ nrht[nbrb] ]) * HALF;
- wminusy_H = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
- sqrt(g*Hyminus2), Hic-Hbr, Hbr-Hbb2, Ht2-Hic) *
- (Hic - Hbr))+wminusy_H)*HALF*HALF;
- }
-
-
- if(level[nt] < level[ntt]) {
- Htt = (Htt + H[ nrht[ntt] ]) * HALF;
- Vtt = (Vtt + V[ nrht[ntt] ]) * HALF;
- }
-
- real_t Hb2 = Hb;
- real_t Vb2 = Vb;
- if(lvl < level[nb]) {
- Hb2 = (Hb2 + Hbr) * HALF;
- Vb2 = (Vb2 + Vbr) * HALF;
- }
-
- real_t wplusy_H = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
- Ht-Hic, Hic-Hb2, Htt-Ht);
-
- wplusy_H *= Ht - Hic;
-
- if(lvl < level[nt]) {
- if(level[ntr] < level[ntrt])
- Htt2 = (Htt2 + H[ nrht[ntrt] ]) * HALF;
- wplusy_H = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
- sqrt(g*Hyplus2), Htr-Hic, Hic-Hb2, Htt2-Htr) *
- (Htr - Hic))+wplusy_H)*HALF*HALF;
- }
-
- real_t wminusy_V = w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus/Hyminus) + sqrt(g*Hyminus),
- Vic-Vb, Vb-Vbb, Vt2-Vic);
-
- wminusy_V *= Vic - Vb;
-
- if(lvl < level[nb]) {
- if(level[nbr] < level[nbrb])
- Vbb2 = (Vbb2 + V[ nrht[nbrb] ]) * HALF;
- wminusy_V = ((w_corrector(deltaT, (dric+dyb)*HALF, fabs(Vyminus2/Hyminus2) +
- sqrt(g*Hyminus2), Vic-Vbr, Vbr-Vbb2, Vt2-Vic) *
- (Vic - Vbr))+wminusy_V)*HALF*HALF;
- }
-
- real_t wplusy_V = w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus/Hyplus) + sqrt(g*Hyplus),
- Vt-Vic, Vic-Vb2, Vtt-Vt);
-
- wplusy_V *= Vt - Vic;
-
- if(lvl < level[nt]) {
- if(level[ntr] < level[ntrt])
- Vtt2 = (Vtt2 + V[ nrht[ntrt] ]) * HALF;
- wplusy_V = ((w_corrector(deltaT, (dric+dyt)*HALF, fabs(Vyplus2/Hyplus2) +
- sqrt(g*Hyplus2), Vtr-Vic, Vic-Vb2, Vtt2-Vtr) *
- (Vtr - Vic))+wplusy_V)*HALF*HALF;
- }
-
- real_t Hxfluxminus = HNEWXFLUXMINUS;
- real_t Uxfluxminus = UNEWXFLUXMINUS;
- real_t Vxfluxminus = UVNEWFLUXMINUS;
-
- real_t Hxfluxplus = HNEWXFLUXPLUS;
- real_t Uxfluxplus = UNEWXFLUXPLUS;
- real_t Vxfluxplus = UVNEWFLUXPLUS;
-
- real_t Hyfluxminus = HNEWYFLUXMINUS;
- real_t Uyfluxminus = VUNEWFLUXMINUS;
- real_t Vyfluxminus = VNEWYFLUXMINUS;
-
- real_t Hyfluxplus = HNEWYFLUXPLUS;
- real_t Uyfluxplus = VUNEWFLUXPLUS;
- real_t Vyfluxplus = VNEWYFLUXPLUS;
-
- if(lvl < level[nl]) {
- Hxfluxminus = (Hxfluxminus + HNEWXFLUXMINUS2) * HALF;
- Uxfluxminus = (Uxfluxminus + UNEWXFLUXMINUS2) * HALF;
- Vxfluxminus = (Vxfluxminus + UVNEWFLUXMINUS2) * HALF;
- }
-
- if(lvl < level[nr]) {
- Hxfluxplus = (Hxfluxplus + HNEWXFLUXPLUS2) * HALF;
- Uxfluxplus = (Uxfluxplus + UNEWXFLUXPLUS2) * HALF;
- Vxfluxplus = (Vxfluxplus + UVNEWFLUXPLUS2) * HALF;
- }
-
- if(lvl < level[nb]) {
- Hyfluxminus = (Hyfluxminus + HNEWYFLUXMINUS2) * HALF;
- Uyfluxminus = (Uyfluxminus + VUNEWFLUXMINUS2) * HALF;
- Vyfluxminus = (Vyfluxminus + VNEWYFLUXMINUS2) * HALF;
- }
-
- if(lvl < level[nt]) {
- Hyfluxplus = (Hyfluxplus + HNEWYFLUXPLUS2) * HALF;
- Uyfluxplus = (Uyfluxplus + VUNEWFLUXPLUS2) * HALF;
- Vyfluxplus = (Vyfluxplus + VNEWYFLUXPLUS2) * HALF;
- }
-
- H_new[ic] = U_fullstep(deltaT, dxic, Hic,
- Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus)
- - wminusx_H + wplusx_H - wminusy_H + wplusy_H;
- U_new[ic] = U_fullstep(deltaT, dxic, Uic,
- Uxfluxplus, Uxfluxminus, Uyfluxplus, Uyfluxminus)
- - wminusx_U + wplusx_U;
- V_new[ic] = U_fullstep(deltaT, dxic, Vic,
- Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus)
- - wminusy_V + wplusy_V;
-
-#if DEBUG >= 1
- if (DEBUG >= 1) {
- real_t U_tmp = U_new[ic];
- real_t V_tmp = V_new[ic];
- if (U_tmp == 0.0) U_tmp = 0.0;
- if (V_tmp == 0.0) V_tmp = 0.0;
- printf("DEBUG ic %d H_new %lf U_new %lf V_new %lf\n",ic,H_new[ic],U_tmp,V_tmp);
- }
-#endif
-
-/*
- printf("DEBUG ic %d deltaT, %lf dxic, %lf Hic, %lf Hxfluxplus, %lf Hxfluxminus, %lf Hyfluxplus, %lf Hyfluxminus %lf\n",
- ic, deltaT, dxic, Hic, Hxfluxplus, Hxfluxminus, Hyfluxplus, Hyfluxminus);
- printf("DEBUG ic %d wminusx_H %lf wplusx_H %lf wminusy_H %lf wplusy_H %lf\n",ic, wminusx_H, wplusx_H, wminusy_H, wplusy_H);
- printf("DEBUG ic %d deltaT, %lf dxic, %lf Vic, %lf Vxfluxplus, %lf Vxfluxminus, %lf Vyfluxplus, %lf Vyfluxminus %lf\n",
- ic, deltaT, dxic, Vic, Vxfluxplus, Vxfluxminus, Vyfluxplus, Vyfluxminus);
- printf("DEBUG ic %d wminusy_V %lf wplusy_V %lf\n",ic, wminusy_V, wplusy_V);
-*/
- }//end forloop
-
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
- {
-#endif
- // Replace H with H_new and deallocate H. New memory will have the characteristics
- // of the new memory and the name of the old. Both return and arg1 will be reset to new memory
- H = (state_t *)state_memory.memory_replace(H, H_new);
- U = (state_t *)state_memory.memory_replace(U, U_new);
- V = (state_t *)state_memory.memory_replace(V, V_new);
-
- //state_memory.memory_report();
- //printf("DEBUG end finite diff\n\n");
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef _OPENMP
-#pragma omp master
-#endif
- cpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += cpu_timer_stop(tstart_cpu);
-}
-
-#ifdef HAVE_OPENCL
-void State::gpu_calc_finite_difference(double deltaT)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- //cl_mem dev_ptr = NULL;
-
- size_t &ncells = mesh->ncells;
- size_t &ncells_ghost = mesh->ncells_ghost;
- if (ncells_ghost < ncells) ncells_ghost = ncells;
- int &levmx = mesh->levmx;
- cl_mem &dev_celltype = mesh->dev_celltype;
- cl_mem &dev_nlft = mesh->dev_nlft;
- cl_mem &dev_nrht = mesh->dev_nrht;
- cl_mem &dev_nbot = mesh->dev_nbot;
- cl_mem &dev_ntop = mesh->dev_ntop;
- cl_mem &dev_level = mesh->dev_level;
- cl_mem &dev_levdx = mesh->dev_levdx;
- cl_mem &dev_levdy = mesh->dev_levdy;
-
- assert(dev_H);
- assert(dev_U);
- assert(dev_V);
- assert(dev_nlft);
- assert(dev_nrht);
- assert(dev_nbot);
- assert(dev_ntop);
- assert(dev_level);
- assert(dev_levdx);
- assert(dev_levdy);
-
- cl_mem dev_H_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_H_new"), DEVICE_REGULAR_MEMORY);
- cl_mem dev_U_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_U_new"), DEVICE_REGULAR_MEMORY);
- cl_mem dev_V_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_V_new"), DEVICE_REGULAR_MEMORY);
-
- size_t local_work_size = 128;
- size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
-
-#ifdef HAVE_MPI
- if (mesh->numpe > 1) {
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_local, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- /*
- __kernel void copy_state_data_cl(
- const int isize, // 0
- __global state_t *H, // 1
- __global state_t *U, // 2
- __global state_t *V, // 3
- __global state_t *H_new, // 4
- __global state_t *U_new, // 5
- __global state_t *V_new) // 6
- */
-
- ezcl_set_kernel_arg(kernel_copy_state_data, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_copy_state_data, 1, sizeof(cl_mem), (void *)&dev_H);
- ezcl_set_kernel_arg(kernel_copy_state_data, 2, sizeof(cl_mem), (void *)&dev_U);
- ezcl_set_kernel_arg(kernel_copy_state_data, 3, sizeof(cl_mem), (void *)&dev_V);
- ezcl_set_kernel_arg(kernel_copy_state_data, 4, sizeof(cl_mem), (void *)&dev_H_new);
- ezcl_set_kernel_arg(kernel_copy_state_data, 5, sizeof(cl_mem), (void *)&dev_U_new);
- ezcl_set_kernel_arg(kernel_copy_state_data, 6, sizeof(cl_mem), (void *)&dev_V_new);
-
- //ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_state_data, 1, NULL, &global_work_size, &local_work_size, ©_state_data_event);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_copy_state_data, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- dev_H = (cl_mem)gpu_state_memory.memory_replace(dev_H, dev_H_new);
- dev_U = (cl_mem)gpu_state_memory.memory_replace(dev_U, dev_U_new);
- dev_V = (cl_mem)gpu_state_memory.memory_replace(dev_V, dev_V_new);
-
- L7_Dev_Update(dev_H, L7_STATE_T, mesh->cell_handle);
- L7_Dev_Update(dev_U, L7_STATE_T, mesh->cell_handle);
- L7_Dev_Update(dev_V, L7_STATE_T, mesh->cell_handle);
-
- dev_H_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_H_new"), DEVICE_REGULAR_MEMORY);
- dev_U_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_U_new"), DEVICE_REGULAR_MEMORY);
- dev_V_new = (cl_mem)gpu_state_memory.memory_malloc(ncells_ghost, sizeof(cl_state_t), const_cast<char *>("dev_V_new"), DEVICE_REGULAR_MEMORY);
-
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_ghost, 1, NULL, &global_work_size, &local_work_size, NULL);
- } else {
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL);
- }
-#else
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL);
-#endif
-
- /*
- __kernel void calc_finite_difference_cl(
- const int ncells, // 0 Total number of cells.
- const int lvmax, // 1 Maximum level
- __global state_t *H, // 2
- __global state_t *U, // 3
- __global state_t *V, // 4
- __global state_t *H_new, // 5
- __global state_t *U_new, // 6
- __global state_t *V_new, // 7
- __global const int *nlft, // 8 Array of left neighbors.
- __global const int *nrht, // 9 Array of right neighbors.
- __global const int *ntop, // 10 Array of bottom neighbors.
- __global const int *nbot, // 11 Array of top neighbors.
- __global const int *level, // 12 Array of level information.
- const real_t deltaT, // 13 Size of time step.
- __global const real_t *lev_dx, // 14
- __global const real_t *lev_dy, // 15
- __local state4_t *tile, // 16 Tile size in state4.
- __local int8 *itile) // 17 Tile size in int8.
- */
- cl_event calc_finite_difference_event;
-
- real_t deltaT_local = deltaT;
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 2, sizeof(cl_mem), (void *)&dev_H);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 3, sizeof(cl_mem), (void *)&dev_U);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 4, sizeof(cl_mem), (void *)&dev_V);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 5, sizeof(cl_mem), (void *)&dev_H_new);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 6, sizeof(cl_mem), (void *)&dev_U_new);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 7, sizeof(cl_mem), (void *)&dev_V_new);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 8, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_calc_finite_difference, 9, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,10, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,11, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,12, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,13, sizeof(cl_real_t), (void *)&deltaT_local);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,14, sizeof(cl_mem), (void *)&dev_levdx);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,15, sizeof(cl_mem), (void *)&dev_levdy);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,16, local_work_size*sizeof(cl_state4_t), NULL);
- ezcl_set_kernel_arg(kernel_calc_finite_difference,17, local_work_size*sizeof(cl_int8), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_calc_finite_difference, 1, NULL, &global_work_size, &local_work_size, &calc_finite_difference_event);
-
- ezcl_wait_for_events(1, &calc_finite_difference_event);
- ezcl_event_release(calc_finite_difference_event);
-
- dev_H = (cl_mem)gpu_state_memory.memory_replace(dev_H, dev_H_new);
- dev_U = (cl_mem)gpu_state_memory.memory_replace(dev_U, dev_U_new);
- dev_V = (cl_mem)gpu_state_memory.memory_replace(dev_V, dev_V_new);
-
- gpu_timers[STATE_TIMER_FINITE_DIFFERENCE] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
-}
-#endif
-
-void State::symmetry_check(const char *string, vector<int> sym_index, double eps,
- SIGN_RULE sign_rule, int &flag)
-{
- size_t &ncells = mesh->ncells;
-
- double xsign = 1.0, ysign = 1.0;
-
- if (sign_rule == DIAG_RULE || sign_rule == X_RULE) {
- xsign = -1.0;
- }
-
- if (sign_rule == DIAG_RULE || sign_rule == Y_RULE) {
- ysign = -1.0;
- }
-
- for (uint ic=0; ic<ncells; ic++) {
- /* Symmetrical check */
- if (fabs(H[ic] - H[sym_index[ic]]) > eps) {
- printf("%s ic %d sym %d H[ic] %lf Hsym %lf diff %lf\n",
- string,ic,sym_index[ic],H[ic],H[sym_index[ic]],fabs(H[ic]-H[sym_index[ic]]));
- flag++;
- }
- if (fabs(U[ic] - xsign*U[sym_index[ic]]) > eps) {
- printf("%s ic %d sym %d U[ic] %lf Usym %lf diff %lf\n",
- string,ic,sym_index[ic],U[ic],U[sym_index[ic]],fabs(U[ic]-xsign*U[sym_index[ic]]));
- flag++;
- }
- if (fabs(V[ic] - ysign*V[sym_index[ic]]) > eps) {
- printf("%s ic %d sym %d V[ic] %lf Vsym %lf diff %lf\n",
- string,ic,sym_index[ic],V[ic],V[sym_index[ic]],fabs(V[ic]-ysign*V[sym_index[ic]]));
- flag++;
- }
- }
-
-}
-
-size_t State::calc_refine_potential(vector<int> &mpot,int &icount, int &jcount)
-{
-
- struct timeval tstart_cpu;
-#ifdef _OPENMP
-#pragma omp parallel
-{
-#endif
-
- struct timeval tstart_lev2;
-
-#ifdef _OPENMP
-#pragma omp master
-{
-#endif
- cpu_timer_start(&tstart_cpu);
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-#ifdef _OPENMP
-}
-#endif
-
- int *nlft, *nrht, *nbot, *ntop, *level;
-
- size_t ncells = mesh->ncells;
- nlft = mesh->nlft;
- nrht = mesh->nrht;
- nbot = mesh->nbot;
- ntop = mesh->ntop;
- level = mesh->level;
-
-#ifdef _OPENMP
-#pragma omp master
- {
-#endif
- icount=0;
- jcount=0;
-#ifdef _OPENMP
- }
-#pragma omp barrier
-#endif
-
-#ifdef HAVE_MPI
- // We need to update the ghost regions and boundary regions for the state
- // variables since they were changed in the finite difference routine. We
- // want to use the updated values for refinement decisions
- if (mesh->numpe > 1) {
- apply_boundary_conditions_local();
-#ifdef _OPENMP
-#pragma omp barrier
-#pragma omp master
-{
-#endif
- L7_Update(&H[0], L7_STATE_T, mesh->cell_handle);
- L7_Update(&U[0], L7_STATE_T, mesh->cell_handle);
- L7_Update(&V[0], L7_STATE_T, mesh->cell_handle);
-#ifdef _OPENMP
-}
-#pragma omp barrier
-#endif
- apply_boundary_conditions_ghost();
- } else {
- apply_boundary_conditions();
- }
-#else
- apply_boundary_conditions();
-#endif
-
-#ifdef _OPENMP
-#pragma omp barrier
-#endif
-/*****HIGH LEVEL OMP******/
-
- int lowerBound, upperBound;
- //mesh->set_bounds(ncells);
- mesh->get_bounds(lowerBound,upperBound);
- for (int ic=lowerBound; ic<upperBound; ic++) {
-
- if (mesh->celltype[ic] != REAL_CELL) continue;
-
- state_t Hic = H[ic];
- //state_t Uic = U[ic];
- //state_t Vic = V[ic];
-
- int nl = nlft[ic];
- state_t Hl = H[nl];
- //state_t Ul = U[nl];
- //state_t Vl = V[nl];
-
- if (level[nl] > level[ic]){
- int nlt = ntop[nl];
- Hl = REFINE_HALF * (Hl + H[nlt]);
- }
-
- int nr = nrht[ic];
- state_t Hr = H[nr];
- //state_t Ur = U[nr];
- //state_t Vr = V[nr];
-
- if (level[nr] > level[ic]){
- int nrt = ntop[nr];
- Hr = REFINE_HALF * (Hr + H[nrt]);
- }
-
- int nb = nbot[ic];
- state_t Hb = H[nb];
- //state_t Ub = U[nb];
- //state_t Vb = V[nb];
-
- if (level[nb] > level[ic]){
- int nbr = nrht[nb];
- Hb = REFINE_HALF * (Hb + H[nbr]);
- }
-
- int nt = ntop[ic];
- state_t Ht = H[nt];
- //state_t Ut = U[nt];
- //state_t Vt = V[nt];
-
- if (level[nt] > level[ic]){
- int ntr = nrht[nt];
- Ht = REFINE_HALF * (Ht + H[ntr]);
- }
-
- state_t duplus1; //, duplus2;
- state_t duhalf1; //, duhalf2;
- state_t duminus1; //, duminus2;
-
- duplus1 = Hr-Hic;
- //duplus2 = Ur-Uic;
- duhalf1 = Hic-Hl;
- //duhalf2 = Uic-Ul;
-
- state_t qmax = REFINE_NEG_THOUSAND;
-
- state_t qpot = max(fabs(duplus1/Hic), fabs(duhalf1/Hic));
- if (qpot > qmax) qmax = qpot;
-
- duminus1 = Hic-Hl;
- //duminus2 = Uic-Ul;
- duhalf1 = Hr-Hic;
- //duhalf2 = Ur-Uic;
-
- qpot = max(fabs(duminus1/Hic), fabs(duhalf1/Hic));
- if (qpot > qmax) qmax = qpot;
-
- duplus1 = Ht-Hic;
- //duplus2 = Vt-Vic;
- duhalf1 = Hic-Hb;
- //duhalf2 = Vic-Vb;
-
- qpot = max(fabs(duplus1/Hic), fabs(duhalf1/Hic));
- if (qpot > qmax) qmax = qpot;
-
- duminus1 = Hic-Hb;
- //duminus2 = Vic-Vb;
- duhalf1 = Ht-Hic;
- //duhalf2 = Vt-Vic;
-
- qpot = max(fabs(duminus1/Hic), fabs(duhalf1/Hic));
- if (qpot > qmax) qmax = qpot;
-
- mpot[ic]=0;
- if (qmax > REFINE_GRADIENT && level[ic] < mesh->levmx) {
- mpot[ic]=1;
- } else if (qmax < COARSEN_GRADIENT && level[ic] > 0) {
- mpot[ic] = -1;
- }
- //if (mpot[ic]) printf("DEBUG cpu cell is %d mpot %d\n",ic,mpot[ic]);
- }
-
-#ifdef _OPENMP
-#pragma omp master
-{
-#endif
- if (TIMING_LEVEL >= 2) {
- cpu_timers[STATE_TIMER_CALC_MPOT] += cpu_timer_stop(tstart_lev2);
- }
-#ifdef _OPENMP
-}
-#endif
-
-#ifdef _OPENMP
-}
-#pragma omp barrier
-#endif
- int newcount = mesh->refine_smooth(mpot, icount, jcount);
- //printf("DEBUG -- after refine smooth in file %s line %d icount %d jcount %d newcount %d\n",__FILE__,__LINE__,icount,jcount,newcount);
-
- cpu_timers[STATE_TIMER_REFINE_POTENTIAL] += cpu_timer_stop(tstart_cpu);
-
- return(newcount);
-}
-
-#ifdef HAVE_OPENCL
-size_t State::gpu_calc_refine_potential(int &icount, int &jcount)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- struct timeval tstart_lev2;
- if (TIMING_LEVEL >= 2) cpu_timer_start(&tstart_lev2);
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- size_t &ncells = mesh->ncells;
- int &levmx = mesh->levmx;
- cl_mem &dev_nlft = mesh->dev_nlft;
- cl_mem &dev_nrht = mesh->dev_nrht;
- cl_mem &dev_nbot = mesh->dev_nbot;
- cl_mem &dev_ntop = mesh->dev_ntop;
- //cl_mem &dev_mpot = mesh->dev_mpot;
- cl_mem &dev_i = mesh->dev_i;
- cl_mem &dev_j = mesh->dev_j;
- cl_mem &dev_level = mesh->dev_level;
- cl_mem &dev_celltype = mesh->dev_celltype;
- cl_mem &dev_levdx = mesh->dev_levdx;
- cl_mem &dev_levdy = mesh->dev_levdy;
-
- assert(dev_H);
- assert(dev_U);
- assert(dev_V);
- assert(dev_nlft);
- assert(dev_nrht);
- assert(dev_nbot);
- assert(dev_ntop);
- assert(dev_i);
- assert(dev_j);
- assert(dev_level);
- //assert(dev_mpot);
- //assert(dev_ioffset);
- assert(dev_levdx);
- assert(dev_levdy);
-
- icount = 0;
- jcount = 0;
-
- size_t local_work_size = 128;
- size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
- size_t block_size = global_work_size/local_work_size;
-
-#ifdef HAVE_MPI
- //size_t nghost_local = mesh->ncells_ghost - ncells;
-
- if (mesh->numpe > 1) {
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_local, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_local, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- L7_Dev_Update(dev_H, L7_STATE_T, mesh->cell_handle);
- L7_Dev_Update(dev_U, L7_STATE_T, mesh->cell_handle);
- L7_Dev_Update(dev_V, L7_STATE_T, mesh->cell_handle);
-
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions_ghost, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions_ghost, 1, NULL, &global_work_size, &local_work_size, NULL);
- } else {
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL);
- }
-#else
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 0, sizeof(cl_int), &ncells);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 1, sizeof(cl_mem), &dev_celltype);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 2, sizeof(cl_mem), &dev_nlft);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 3, sizeof(cl_mem), &dev_nrht);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 4, sizeof(cl_mem), &dev_ntop);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 5, sizeof(cl_mem), &dev_nbot);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 6, sizeof(cl_mem), &dev_H);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 7, sizeof(cl_mem), &dev_U);
- ezcl_set_kernel_arg(kernel_apply_boundary_conditions, 8, sizeof(cl_mem), &dev_V);
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_apply_boundary_conditions, 1, NULL, &global_work_size, &local_work_size, NULL);
-#endif
-
-#ifdef BOUNDS_CHECK
- {
- vector<int> nlft_tmp(mesh->ncells_ghost);
- vector<int> nrht_tmp(mesh->ncells_ghost);
- vector<int> nbot_tmp(mesh->ncells_ghost);
- vector<int> ntop_tmp(mesh->ncells_ghost);
- vector<int> level_tmp(mesh->ncells_ghost);
- vector<state_t> H_tmp(mesh->ncells_ghost);
- ezcl_enqueue_read_buffer(command_queue, dev_nlft, CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nlft_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nrht, CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nrht_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_nbot, CL_FALSE, 0, mesh->ncells_ghost*sizeof(cl_int), &nbot_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_ntop, CL_TRUE, 0, mesh->ncells_ghost*sizeof(cl_int), &ntop_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_level, CL_TRUE, 0, mesh->ncells_ghost*sizeof(cl_int), &level_tmp[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_H, CL_TRUE, 0, mesh->ncells_ghost*sizeof(cl_int), &H_tmp[0], NULL);
- for (uint ic=0; ic<ncells; ic++){
- int nl = nlft_tmp[ic];
- if (nl<0 || nl>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nlft %d\n",mesh->mype,__LINE__,ic,nl);
- if (level_tmp[nl] > level_tmp[ic]){
- int ntl = ntop_tmp[nl];
- if (ntl<0 || ntl>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d global %d nlft %d ntop of nlft %d\n",mesh->mype,__LINE__,ic,ic+mesh->noffset,nl,ntl);
- }
- int nr = nrht_tmp[ic];
- if (nr<0 || nr>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht %d\n",mesh->mype,__LINE__,ic,nr);
- if (level_tmp[nr] > level_tmp[ic]){
- int ntr = ntop_tmp[nr];
- if (ntr<0 || ntr>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d ntop of nrht %d\n",mesh->mype,__LINE__,ic,ntr);
- }
- int nb = nbot_tmp[ic];
- if (nb<0 || nb>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nbot %d\n",mesh->mype,__LINE__,ic,nb);
- if (level_tmp[nb] > level_tmp[ic]){
- int nrb = nrht_tmp[nb];
- if (nrb<0 || nrb>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht of nbot %d\n",mesh->mype,__LINE__,ic,nrb);
- }
- int nt = ntop_tmp[ic];
- if (nt<0 || nt>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d ntop %d\n",mesh->mype,__LINE__,ic,nt);
- if (level_tmp[nt] > level_tmp[ic]){
- int nrt = nrht_tmp[nt];
- if (nrt<0 || nrt>= (int)mesh->ncells_ghost) printf("%d: Warning at line %d cell %d nrht of ntop %d\n",mesh->mype,__LINE__,ic,nrt);
- }
- }
- for (uint ic=0; ic<mesh->ncells_ghost; ic++){
- if (H_tmp[ic] < 1.0) printf("%d: Warning at line %d cell %d H %lf\n",mesh->mype,__LINE__,ic,H_tmp[ic]);
- }
- }
-#endif
-
- size_t result_size = 1;
- cl_mem dev_result = ezcl_malloc(NULL, const_cast<char *>("dev_result"), &result_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
- cl_mem dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_int2), CL_MEM_READ_WRITE, 0);
-
- dev_mpot = ezcl_malloc(NULL, const_cast<char *>("dev_mpot"), &mesh->ncells_ghost, sizeof(cl_int), CL_MEM_READ_WRITE, 0);
-
- /*
- __kernel void refine_potential
- const int ncells, // 0 Total number of cells.
- const int levmx, // 1 Maximum level
- __global state_t *H, // 2
- __global state_t *U, // 3
- __global state_t *V, // 4
- __global const int *nlft, // 5 Array of left neighbors.
- __global const int *nrht, // 6 Array of right neighbors.
- __global const int *ntop, // 7 Array of bottom neighbors.
- __global const int *nbot, // 8 Array of top neighbors.
- __global const int *level, // 9 Array of level information.
- __global const int *celltype, // 10 Array of celltype information.
- __global int *mpot, // 11 Array of mesh potential information.
- __global int2 *redscratch, // 12
- __global const real_t *lev_dx, // 13
- __global const real_t *lev_dy, // 14
- __global int2 *result, // 15
- __local state_t *tile, // 16 Tile size in real4.
- __local int8 *itile) // 17 Tile size in int8.
- */
-
- ezcl_set_kernel_arg(kernel_refine_potential, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_refine_potential, 1, sizeof(cl_int), (void *)&levmx);
- ezcl_set_kernel_arg(kernel_refine_potential, 2, sizeof(cl_mem), (void *)&dev_H);
- ezcl_set_kernel_arg(kernel_refine_potential, 3, sizeof(cl_mem), (void *)&dev_U);
- ezcl_set_kernel_arg(kernel_refine_potential, 4, sizeof(cl_mem), (void *)&dev_V);
- ezcl_set_kernel_arg(kernel_refine_potential, 5, sizeof(cl_mem), (void *)&dev_nlft);
- ezcl_set_kernel_arg(kernel_refine_potential, 6, sizeof(cl_mem), (void *)&dev_nrht);
- ezcl_set_kernel_arg(kernel_refine_potential, 7, sizeof(cl_mem), (void *)&dev_ntop);
- ezcl_set_kernel_arg(kernel_refine_potential, 8, sizeof(cl_mem), (void *)&dev_nbot);
- ezcl_set_kernel_arg(kernel_refine_potential, 9, sizeof(cl_mem), (void *)&dev_i);
- ezcl_set_kernel_arg(kernel_refine_potential,10, sizeof(cl_mem), (void *)&dev_j);
- ezcl_set_kernel_arg(kernel_refine_potential,11, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_refine_potential,12, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_refine_potential,13, sizeof(cl_mem), (void *)&dev_levdx);
- ezcl_set_kernel_arg(kernel_refine_potential,14, sizeof(cl_mem), (void *)&dev_levdy);
- ezcl_set_kernel_arg(kernel_refine_potential,15, sizeof(cl_mem), (void *)&dev_mpot);
- ezcl_set_kernel_arg(kernel_refine_potential,16, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_refine_potential,17, sizeof(cl_mem), (void *)&dev_result);
- ezcl_set_kernel_arg(kernel_refine_potential,18, local_work_size*sizeof(cl_state_t), NULL);
- ezcl_set_kernel_arg(kernel_refine_potential,19, local_work_size*sizeof(cl_int8), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_refine_potential, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- mesh->gpu_rezone_count2(block_size, local_work_size, dev_redscratch, dev_result);
-
- int count[2] = {0, 0};
- ezcl_enqueue_read_buffer(command_queue, dev_result, CL_TRUE, 0, sizeof(cl_int2), count, NULL);
- icount = count[0];
- jcount = count[1];
- //size_t result = ncells + icount - jcount;
-
- //int mpot_check[ncells];
- //ezcl_enqueue_read_buffer(command_queue, dev_mpot, CL_TRUE, 0, ncells*sizeof(cl_int), mpot_check, NULL);
- //for (int ic=0; ic<ncells; ic++){
- // if (mpot_check[ic]) printf("DEBUG -- cell %d mpot %d\n",ic,mpot_check[ic]);
- //}
-
- //printf("result = %lu after first refine potential icount %d jcount %d\n",result, icount, jcount);
-// int which_smooth = 1;
-
- ezcl_device_memory_delete(dev_redscratch);
- ezcl_device_memory_delete(dev_result);
-
- if (TIMING_LEVEL >= 2) {
- gpu_timers[STATE_TIMER_CALC_MPOT] += (long)(cpu_timer_stop(tstart_lev2)*1.0e9);
- }
-
- int my_result = mesh->gpu_refine_smooth(dev_mpot, icount, jcount);
- //printf("DEBUG gpu calc refine potential %d icount %d jcount %d\n",my_result,icount,jcount);
-
- gpu_timers[STATE_TIMER_REFINE_POTENTIAL] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
-
- return((size_t)my_result);
-}
-#endif
-
-double State::mass_sum(int enhanced_precision_sum)
-{
- size_t &ncells = mesh->ncells;
- int *celltype = mesh->celltype;
- int *level = mesh->level;
-
-#ifdef HAVE_MPI
- //int &mype = mesh->mype;
-#endif
-
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- double summer = 0.0;
- double total_sum = 0.0;
-
- if (enhanced_precision_sum == SUM_KAHAN) {
- //printf("DEBUG -- kahan_sum\n");
- double corrected_next_term, new_sum;
- struct esum_type local;
-#ifdef HAVE_MPI
- struct esum_type global;
-#endif
-
- local.sum = 0.0;
- local.correction = 0.0;
- int ic;
- for (ic = 0; ic < (int)ncells; ic++) {
- if (celltype[ic] == REAL_CELL) {
- // Exclude boundary cells.
- corrected_next_term= H[ic]*mesh->lev_deltax[level[ic]]*mesh->lev_deltay[level[ic]] + local.correction;
- new_sum = local.sum + local.correction;
- local.correction = corrected_next_term - (new_sum - local.sum);
- local.sum = new_sum;
- }
- }
-
-#ifdef HAVE_MPI
- if (mesh->parallel) {
- MPI_Allreduce(&local, &global, 1, MPI_TWO_DOUBLES, KNUTH_SUM, MPI_COMM_WORLD);
- total_sum = global.sum + global.correction;
- } else {
- total_sum = local.sum + local.correction;
- }
-
-//if(mype == 0) printf("MYPE %d: Line %d Iteration %d \t local_sum = %12.6lg, global_sum = %12.6lg\n", mype, __LINE__, mesh->m_ncycle, local.sum, global.sum);
-
-#else
- total_sum = local.sum + local.correction;
-#endif
-
- } else if (enhanced_precision_sum == SUM_REGULAR) {
- //printf("DEBUG -- regular_sum\n");
- for (uint ic=0; ic < ncells; ic++){
- if (celltype[ic] == REAL_CELL) {
- summer += H[ic]*mesh->lev_deltax[level[ic]]*mesh->lev_deltay[level[ic]];
- }
- }
-#ifdef HAVE_MPI
- if (mesh->parallel) {
- MPI_Allreduce(&summer, &total_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
- } else {
- total_sum = summer;
- }
-#else
- total_sum = summer;
-#endif
- }
-
- cpu_timers[STATE_TIMER_MASS_SUM] += cpu_timer_stop(tstart_cpu);
-
- return(total_sum);
-}
-
-#ifdef HAVE_OPENCL
-double State::gpu_mass_sum(int enhanced_precision_sum)
-{
- struct timeval tstart_cpu;
- cpu_timer_start(&tstart_cpu);
-
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- size_t &ncells = mesh->ncells;
- cl_mem &dev_levdx = mesh->dev_levdx;
- cl_mem &dev_levdy = mesh->dev_levdy;
- cl_mem &dev_celltype = mesh->dev_celltype;
- cl_mem &dev_level = mesh->dev_level;
-
- assert(dev_H);
- assert(dev_level);
- assert(dev_levdx);
- assert(dev_levdy);
- assert(dev_celltype);
-
- size_t one = 1;
- cl_mem dev_mass_sum, dev_redscratch;
- double gpu_mass_sum_total;
-
- size_t local_work_size = 128;
- size_t global_work_size = ((ncells+local_work_size - 1) /local_work_size) * local_work_size;
- size_t block_size = global_work_size/local_work_size;
-
- if (enhanced_precision_sum) {
- dev_mass_sum = ezcl_malloc(NULL, const_cast<char *>("dev_mass_sum"), &one, sizeof(cl_real2_t), CL_MEM_READ_WRITE, 0);
- dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_real2_t), CL_MEM_READ_WRITE, 0);
-
- /*
- __kernel void reduce_sum_cl(
- const int isize, // 0
- __global state_t *array, // 1 Array to be reduced.
- __global int *level, // 2
- __global int *levdx, // 3
- __global int *levdy, // 4
- __global int *celltype, // 5
- __global real_t *redscratch, // 6 Final result of operation.
- __local real_t *tile) // 7
- */
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 1, sizeof(cl_mem), (void *)&dev_H);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 2, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 3, sizeof(cl_mem), (void *)&dev_levdx);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 4, sizeof(cl_mem), (void *)&dev_levdy);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 5, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 6, sizeof(cl_mem), (void *)&dev_mass_sum);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 7, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage1of2, 8, local_work_size*sizeof(cl_real2_t), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_epsum_mass_stage1of2, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- if (block_size > 1) {
- /*
- __kernel void reduce_sum_cl(
- const int isize, // 0
- __global int *redscratch, // 1 Array to be reduced.
- __local real_t *tile) // 2
- */
-
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 1, sizeof(cl_mem), (void *)&dev_mass_sum);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 2, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_reduce_epsum_mass_stage2of2, 3, local_work_size*sizeof(cl_real2_t), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_epsum_mass_stage2of2, 1, NULL, &local_work_size, &local_work_size, NULL);
- }
-
- struct esum_type local, global;
- real2_t mass_sum;
-
- ezcl_enqueue_read_buffer(command_queue, dev_mass_sum, CL_TRUE, 0, 1*sizeof(cl_real2_t), &mass_sum, NULL);
-
- local.sum = mass_sum.s0;
- local.correction = mass_sum.s1;
- global.sum = local.sum;
- global.correction = local.correction;
-#ifdef HAVE_MPI
- MPI_Allreduce(&local, &global, 1, MPI_TWO_DOUBLES, KNUTH_SUM, MPI_COMM_WORLD);
-#endif
- gpu_mass_sum_total = global.sum + global.correction;
- } else {
- dev_mass_sum = ezcl_malloc(NULL, const_cast<char *>("dev_mass_sum"), &one, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
- dev_redscratch = ezcl_malloc(NULL, const_cast<char *>("dev_redscratch"), &block_size, sizeof(cl_real_t), CL_MEM_READ_WRITE, 0);
-
- /*
- __kernel void reduce_sum_cl(
- const int isize, // 0
- __global state_t *array, // 1 Array to be reduced.
- __global int *level, // 2
- __global int *levdx, // 3
- __global int *levdy, // 4
- __global int *celltype, // 5
- __global real_t *redscratch, // 6 Final result of operation.
- __local real_t *tile) // 7
- */
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 0, sizeof(cl_int), (void *)&ncells);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 1, sizeof(cl_mem), (void *)&dev_H);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 2, sizeof(cl_mem), (void *)&dev_level);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 3, sizeof(cl_mem), (void *)&dev_levdx);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 4, sizeof(cl_mem), (void *)&dev_levdy);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 5, sizeof(cl_mem), (void *)&dev_celltype);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 6, sizeof(cl_mem), (void *)&dev_mass_sum);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 7, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage1of2, 8, local_work_size*sizeof(cl_real_t), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_mass_stage1of2, 1, NULL, &global_work_size, &local_work_size, NULL);
-
- if (block_size > 1) {
- /*
- __kernel void reduce_sum_cl(
- const int isize, // 0
- __global int *redscratch, // 1 Array to be reduced.
- __local real_t *tile) // 2
- */
-
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 0, sizeof(cl_int), (void *)&block_size);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 1, sizeof(cl_mem), (void *)&dev_mass_sum);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 2, sizeof(cl_mem), (void *)&dev_redscratch);
- ezcl_set_kernel_arg(kernel_reduce_sum_mass_stage2of2, 3, local_work_size*sizeof(cl_real_t), NULL);
-
- ezcl_enqueue_ndrange_kernel(command_queue, kernel_reduce_sum_mass_stage2of2, 1, NULL, &local_work_size, &local_work_size, NULL);
- }
-
- double local_sum, global_sum;
- real_t mass_sum;
-
- ezcl_enqueue_read_buffer(command_queue, dev_mass_sum, CL_TRUE, 0, 1*sizeof(cl_real_t), &mass_sum, NULL);
-
- local_sum = mass_sum;
- global_sum = local_sum;
-#ifdef HAVE_MPI
- MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-#endif
- gpu_mass_sum_total = global_sum;
- }
-
- ezcl_device_memory_delete(dev_redscratch);
- ezcl_device_memory_delete(dev_mass_sum);
-
- gpu_timers[STATE_TIMER_MASS_SUM] += (long)(cpu_timer_stop(tstart_cpu)*1.0e9);
-
- return(gpu_mass_sum_total);
-}
-#endif
-
-#ifdef HAVE_OPENCL
-void State::allocate_device_memory(size_t ncells)
-{
- dev_H = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_H"), DEVICE_REGULAR_MEMORY);
- dev_U = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_U"), DEVICE_REGULAR_MEMORY);
- dev_V = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_V"), DEVICE_REGULAR_MEMORY);
-}
-#endif
-
-void State::resize_old_device_memory(size_t ncells)
-{
-#ifdef HAVE_OPENCL
- gpu_state_memory.memory_delete(dev_H);
- gpu_state_memory.memory_delete(dev_U);
- gpu_state_memory.memory_delete(dev_V);
- dev_H = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_H"), DEVICE_REGULAR_MEMORY);
- dev_U = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_U"), DEVICE_REGULAR_MEMORY);
- dev_V = (cl_mem)gpu_state_memory.memory_malloc(ncells, sizeof(cl_state_t), const_cast<char *>("dev_V"), DEVICE_REGULAR_MEMORY);
-#else
- // Just to block compiler warnings
- if (1 == 2) printf("DEBUG -- ncells is %ld\n",ncells);
-#endif
-}
-
-#ifdef HAVE_MPI
-void State::do_load_balance_local(size_t &numcells){
- mesh->do_load_balance_local(numcells, NULL, state_memory);
- memory_reset_ptrs();
-}
-#endif
-#ifdef HAVE_OPENCL
-#ifdef HAVE_MPI
-void State::gpu_do_load_balance_local(size_t &numcells){
- if (mesh->gpu_do_load_balance_local(numcells, NULL, gpu_state_memory) ){
- //gpu_state_memory.memory_report();
- dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H");
- dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U");
- dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V");
-/*
- if (dev_H == NULL){
- dev_H = (cl_mem)gpu_state_memory.get_memory_ptr("dev_H_new");
- dev_U = (cl_mem)gpu_state_memory.get_memory_ptr("dev_U_new");
- dev_V = (cl_mem)gpu_state_memory.get_memory_ptr("dev_V_new");
- }
- printf("DEBUG memory for proc %d dev_H is %p dev_U is %p dev_V is %p\n",mesh->mype,dev_H,dev_U,dev_V);
-*/
- }
-}
-#endif
-#endif
-
-static double reference_time = 0.0;
-
-void State::output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time)
-{
- int parallel = mesh->parallel;
-
- double cpu_time_compute = 0.0;
- double gpu_time_compute = 0.0;
-
- double cpu_elapsed_time = 0.0;
- double gpu_elapsed_time = 0.0;
-
- double cpu_mesh_time = 0.0;
- double gpu_mesh_time = 0.0;
-
- if (do_cpu_calc) {
- cpu_time_compute = get_cpu_timer(STATE_TIMER_SET_TIMESTEP) +
- get_cpu_timer(STATE_TIMER_FINITE_DIFFERENCE) +
- get_cpu_timer(STATE_TIMER_REFINE_POTENTIAL) +
- get_cpu_timer(STATE_TIMER_REZONE_ALL) +
- mesh->get_cpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
- mesh->get_cpu_timer(MESH_TIMER_LOAD_BALANCE) +
- get_cpu_timer(STATE_TIMER_MASS_SUM) +
- mesh->get_cpu_timer(MESH_TIMER_CALC_SPATIAL_COORDINATES) +
- mesh->get_cpu_timer(MESH_TIMER_PARTITION);
- cpu_elapsed_time = cpu_time_compute;
- cpu_mesh_time = mesh->get_cpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
- get_cpu_timer(STATE_TIMER_REZONE_ALL) +
- mesh->get_cpu_timer(MESH_TIMER_REFINE_SMOOTH) +
- mesh->get_cpu_timer(MESH_TIMER_LOAD_BALANCE);
- }
- if (do_gpu_calc) {
- gpu_time_compute = get_gpu_timer(STATE_TIMER_APPLY_BCS) +
- get_gpu_timer(STATE_TIMER_SET_TIMESTEP) +
- get_gpu_timer(STATE_TIMER_FINITE_DIFFERENCE) +
- get_gpu_timer(STATE_TIMER_REFINE_POTENTIAL) +
- get_gpu_timer(STATE_TIMER_REZONE_ALL) +
- mesh->get_gpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
- mesh->get_gpu_timer(MESH_TIMER_LOAD_BALANCE) +
- get_gpu_timer(STATE_TIMER_MASS_SUM) +
- mesh->get_gpu_timer(MESH_TIMER_CALC_SPATIAL_COORDINATES) +
- mesh->get_gpu_timer(MESH_TIMER_COUNT_BCS);
- gpu_elapsed_time = get_gpu_timer(STATE_TIMER_WRITE) + gpu_time_compute + get_gpu_timer(STATE_TIMER_READ);
- gpu_mesh_time = mesh->get_gpu_timer(MESH_TIMER_CALC_NEIGHBORS) +
- get_gpu_timer(STATE_TIMER_REZONE_ALL) +
- mesh->get_gpu_timer(MESH_TIMER_REFINE_SMOOTH) +
- mesh->get_gpu_timer(MESH_TIMER_LOAD_BALANCE);
- }
-
- if (! parallel && do_cpu_calc) reference_time = cpu_elapsed_time;
-
- double speedup_ratio = 0.0;
- if (reference_time > 0.0){
- if (do_cpu_calc && parallel) speedup_ratio = reference_time/cpu_elapsed_time;
- if (do_gpu_calc) speedup_ratio = reference_time/gpu_elapsed_time;
- }
-
- if (do_cpu_calc) {
- output_timer_block(MESH_DEVICE_CPU, cpu_elapsed_time, cpu_mesh_time, cpu_time_compute, total_elapsed_time, speedup_ratio);
- }
- if (do_gpu_calc) {
- output_timer_block(MESH_DEVICE_GPU, gpu_elapsed_time, gpu_mesh_time, gpu_time_compute, total_elapsed_time, speedup_ratio);
- }
-}
-
-void State::output_timer_block(mesh_device_types device_type, double elapsed_time,
- double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio)
-{
- int mype = mesh->mype;
- int parallel = mesh->parallel;
-
- int rank = mype;
- if (! parallel) {
- // We need to get rank info for check routines
-#ifdef HAVE_MPI
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-#endif
- }
-
- if (! parallel && rank) return;
-
- char device_string[10];
- if (device_type == MESH_DEVICE_CPU) {
- sprintf(device_string,"CPU");
- } else {
- sprintf(device_string,"GPU");
- }
-
-#ifdef TIMING
- if (rank == 0) {
- printf("\n");
- printf("~~~~~~~~~~~~~~~~ Device timing information ~~~~~~~~~~~~~~~~~~\n");
- }
-
- if (rank == 0 && parallel) {
- printf("\n%3s: Parallel timings\n\n",device_string);
- }
-
- if (device_type == MESH_DEVICE_GPU) {
- mesh->parallel_output("GPU: Write to device time was", get_gpu_timer(STATE_TIMER_WRITE), 0, "s");
- mesh->parallel_output("GPU: Read from device time was", get_gpu_timer(STATE_TIMER_READ), 0, "s");
- }
-
- const char *device_compute_string[2] = {
- "CPU: Device compute time was",
- "GPU: Device compute time was"
- };
- mesh->parallel_output(device_compute_string[device_type], compute_time, 0, "s");
-
- timer_output(STATE_TIMER_SET_TIMESTEP, device_type, 1);
- timer_output(STATE_TIMER_FINITE_DIFFERENCE, device_type, 1);
- timer_output(STATE_TIMER_REFINE_POTENTIAL, device_type, 1);
- timer_output(STATE_TIMER_CALC_MPOT, device_type, 2);
- mesh->timer_output(MESH_TIMER_REFINE_SMOOTH, device_type, 2);
- timer_output(STATE_TIMER_REZONE_ALL, device_type, 1);
- mesh->timer_output(MESH_TIMER_PARTITION, device_type, 1);
- mesh->timer_output(MESH_TIMER_CALC_NEIGHBORS, device_type, 1);
- if (mesh->get_calc_neighbor_type() == HASH_TABLE) {
- mesh->timer_output(MESH_TIMER_HASH_SETUP, device_type, 2);
- mesh->timer_output(MESH_TIMER_HASH_QUERY, device_type, 2);
- if (parallel) {
- mesh->timer_output(MESH_TIMER_FIND_BOUNDARY, device_type, 2);
- mesh->timer_output(MESH_TIMER_PUSH_SETUP, device_type, 2);
- mesh->timer_output(MESH_TIMER_PUSH_BOUNDARY, device_type, 2);
- mesh->timer_output(MESH_TIMER_LOCAL_LIST, device_type, 2);
- mesh->timer_output(MESH_TIMER_LAYER1, device_type, 2);
- mesh->timer_output(MESH_TIMER_LAYER2, device_type, 2);
- mesh->timer_output(MESH_TIMER_LAYER_LIST, device_type, 2);
- mesh->timer_output(MESH_TIMER_COPY_MESH_DATA, device_type, 2);
- mesh->timer_output(MESH_TIMER_FILL_MESH_GHOST, device_type, 2);
- mesh->timer_output(MESH_TIMER_FILL_NEIGH_GHOST, device_type, 2);
- mesh->timer_output(MESH_TIMER_SET_CORNER_NEIGH, device_type, 2);
- mesh->timer_output(MESH_TIMER_NEIGH_ADJUST, device_type, 2);
- mesh->timer_output(MESH_TIMER_SETUP_COMM, device_type, 2);
- }
- } else {
- mesh->timer_output(MESH_TIMER_KDTREE_SETUP, device_type, 2);
- mesh->timer_output(MESH_TIMER_KDTREE_QUERY, device_type, 2);
- }
- timer_output(STATE_TIMER_MASS_SUM, device_type, 1);
- if (parallel) {
- mesh->timer_output(MESH_TIMER_LOAD_BALANCE, device_type, 1);
- }
- mesh->timer_output(MESH_TIMER_CALC_SPATIAL_COORDINATES, device_type, 1);
- if (! mesh->have_boundary) {
- mesh->timer_output(MESH_TIMER_COUNT_BCS, device_type, 1);
- }
- if (rank == 0) printf("=============================================================\n");
-
- const char *profile_string[2] = {
- "Profiling: Total CPU time was",
- "Profiling: Total GPU time was"
- };
- mesh->parallel_output(profile_string[device_type], elapsed_time, 0, "s");
- if (elapsed_time > 600.0){
- mesh->parallel_output(" or ", elapsed_time/60.0, 0, "min");
- }
-
- if (rank == 0) printf("-------------------------------------------------------------\n");
- mesh->parallel_output("Mesh Ops (Neigh+rezone+smooth+balance) ",mesh_time, 0, "s");
- mesh->parallel_output("Mesh Ops Percentage ",mesh_time/elapsed_time*100.0, 0, "percent");
- if (rank == 0) printf("=============================================================\n");
-
- mesh->parallel_output("Profiling: Total time was",total_elapsed_time, 0, "s");
- if (elapsed_time > 600.0){
- mesh->parallel_output(" or ",total_elapsed_time/60.0, 0, "min");
- }
-
- if (speedup_ratio > 0.0) {
- mesh->parallel_output("Parallel Speed-up: ",speedup_ratio, 0, "Reference Serial CPU");
- }
-
- if (rank == 0) printf("=============================================================\n");
-#endif
-}
-
-void State::timer_output(state_timer_category category, mesh_device_types device_type, int timer_level)
-{
- int mype = mesh->mype;
-
- double local_time = 0.0;
- if (device_type == MESH_DEVICE_CPU){
- local_time = get_cpu_timer(category);
- } else {
- local_time = get_gpu_timer(category);
- }
-
- char string[80] = "/0";
-
- if (mype == 0) {
- const char *blank=" ";
-
- const char *device_string[2] = {
- "CPU",
- "GPU"
- };
-
- sprintf(string,"%3s: %.*s%-30.30s\t", device_string[device_type],
- 2*timer_level, blank, state_timer_descriptor[category]);
- }
-
- mesh->parallel_output(string, local_time, timer_level, "s");
-}
-
-#ifdef HAVE_OPENCL
-void State::compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells)
-{
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- vector<state_t>H_check(ncells);
- vector<state_t>U_check(ncells);
- vector<state_t>V_check(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_H, CL_FALSE, 0, ncells*sizeof(cl_state_t), &H_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_U, CL_FALSE, 0, ncells*sizeof(cl_state_t), &U_check[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_V, CL_TRUE, 0, ncells*sizeof(cl_state_t), &V_check[0], NULL);
- for (uint ic = 0; ic < ncells; ic++){
- if (fabs(H[ic]-H_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d H & H_check %d %lf %lf\n",string,cycle,ic,H[ic],H_check[ic]);
- if (fabs(U[ic]-U_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d U & U_check %d %lf %lf\n",string,cycle,ic,U[ic],U_check[ic]);
- if (fabs(V[ic]-V_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d V & V_check %d %lf %lf\n",string,cycle,ic,V[ic],V_check[ic]);
- }
-}
-#endif
-
-void State::compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl)
-{
- state_t *H_global = state_global->H;
- state_t *U_global = state_global->U;
- state_t *V_global = state_global->V;
-
- vector<state_t>H_check(ncells_global);
- vector<state_t>U_check(ncells_global);
- vector<state_t>V_check(ncells_global);
-#ifdef HAVE_MPI
- MPI_Allgatherv(&H[0], ncells, MPI_STATE_T, &H_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&U[0], ncells, MPI_STATE_T, &U_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&V[0], ncells, MPI_STATE_T, &V_check[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
-#else
- // Just to block compiler warnings
- if (1 == 2) printf("DEBUG -- ncells %u nsizes %d ndispl %d\n",ncells, nsizes[0],ndispl[0]);
-#endif
-
- for (uint ic = 0; ic < ncells_global; ic++){
- if (fabs(H_global[ic]-H_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d H & H_check %d %lf %lf\n",string,cycle,ic,H_global[ic],H_check[ic]);
- if (fabs(U_global[ic]-U_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d U & U_check %d %lf %lf\n",string,cycle,ic,U_global[ic],U_check[ic]);
- if (fabs(V_global[ic]-V_check[ic]) > STATE_EPS) printf("DEBUG %s at cycle %d V & V_check %d %lf %lf\n",string,cycle,ic,V_global[ic],V_check[ic]);
- }
-}
-
-#ifdef HAVE_OPENCL
-void State::compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl)
-{
-#ifdef HAVE_MPI
- cl_command_queue command_queue = ezcl_get_command_queue();
-
- state_t *H_global = state_global->H;
- state_t *U_global = state_global->U;
- state_t *V_global = state_global->V;
- cl_mem &dev_H_global = state_global->dev_H;
- cl_mem &dev_U_global = state_global->dev_U;
- cl_mem &dev_V_global = state_global->dev_V;
-
- // Need to compare dev_H to H, etc
- vector<state_t>H_save(ncells);
- vector<state_t>U_save(ncells);
- vector<state_t>V_save(ncells);
- ezcl_enqueue_read_buffer(command_queue, dev_H, CL_FALSE, 0, ncells*sizeof(cl_state_t), &H_save[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_U, CL_FALSE, 0, ncells*sizeof(cl_state_t), &U_save[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_V, CL_TRUE, 0, ncells*sizeof(cl_state_t), &V_save[0], NULL);
- for (uint ic = 0; ic < ncells; ic++){
- if (fabs(H[ic]-H_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d H & H_save %d %lf %lf \n",mype,ncycle,ic,H[ic],H_save[ic]);
- if (fabs(U[ic]-U_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d U & U_save %d %lf %lf \n",mype,ncycle,ic,U[ic],U_save[ic]);
- if (fabs(V[ic]-V_save[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 1 at cycle %d V & V_save %d %lf %lf \n",mype,ncycle,ic,V[ic],V_save[ic]);
- }
-
- // And compare dev_H gathered to H_global, etc
- vector<state_t>H_save_global(ncells_global);
- vector<state_t>U_save_global(ncells_global);
- vector<state_t>V_save_global(ncells_global);
- MPI_Allgatherv(&H_save[0], nsizes[mype], MPI_STATE_T, &H_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&U_save[0], nsizes[mype], MPI_STATE_T, &U_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&V_save[0], nsizes[mype], MPI_STATE_T, &V_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- if (mype == 0) {
- for (uint ic = 0; ic < ncells_global; ic++){
- if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d H_global & H_save_global %d %lf %lf \n",mype,ncycle,ic,H_global[ic],H_save_global[ic]);
- if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d U_global & U_save_global %d %lf %lf \n",mype,ncycle,ic,U_global[ic],U_save_global[ic]);
- if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 2 at cycle %d V_global & V_save_global %d %lf %lf \n",mype,ncycle,ic,V_global[ic],V_save_global[ic]);
- }
- }
-
- // And compare H gathered to H_global, etc
- MPI_Allgatherv(&H[0], nsizes[mype], MPI_STATE_T, &H_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&U[0], nsizes[mype], MPI_STATE_T, &U_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- MPI_Allgatherv(&V[0], nsizes[mype], MPI_STATE_T, &V_save_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, MPI_COMM_WORLD);
- if (mype == 0) {
- for (uint ic = 0; ic < ncells_global; ic++){
- if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d H_global & H_save_global %d %lf %lf \n",ncycle,ic,H_global[ic],H_save_global[ic]);
- if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d U_global & U_save_global %d %lf %lf \n",ncycle,ic,U_global[ic],U_save_global[ic]);
- if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("DEBUG finite_difference 3 at cycle %d V_global & V_save_global %d %lf %lf \n",ncycle,ic,V_global[ic],V_save_global[ic]);
- }
- }
-
- // Now the global dev_H_global to H_global, etc
- ezcl_enqueue_read_buffer(command_queue, dev_H_global, CL_FALSE, 0, ncells_global*sizeof(cl_state_t), &H_save_global[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_U_global, CL_FALSE, 0, ncells_global*sizeof(cl_state_t), &U_save_global[0], NULL);
- ezcl_enqueue_read_buffer(command_queue, dev_V_global, CL_TRUE, 0, ncells_global*sizeof(cl_state_t), &V_save_global[0], NULL);
- if (mype == 0) {
- for (uint ic = 0; ic < ncells_global; ic++){
- if (fabs(H_global[ic]-H_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d H_global & H_save_global %d %lf %lf \n",mype,ncycle,ic,H_global[ic],H_save_global[ic]);
- if (fabs(U_global[ic]-U_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d U_global & U_save_global %d %lf %lf \n",mype,ncycle,ic,U_global[ic],U_save_global[ic]);
- if (fabs(V_global[ic]-V_save_global[ic]) > STATE_EPS) printf("%d: DEBUG finite_difference 4 at cycle %d V_global & V_save_global %d %lf %lf \n",mype,ncycle,ic,V_global[ic],V_save_global[ic]);
- }
- }
-#else
- // Just to get rid of compiler warnings
- if (1 == 2) printf("%d: DEBUG -- ncells %d ncells_global %d ncycle %d nsizes[0] %d ndispl %d state_global %p\n",
- mype,ncells,ncells_global,ncycle,nsizes[0],ndispl[0],state_global);
-#endif
-}
-#endif
-
-void State::print_object_info(void)
-{
- printf(" ---- State object info -----\n");
-
-#ifdef HAVE_OPENCL
- int num_elements, elsize;
-
- num_elements = ezcl_get_device_mem_nelements(dev_H);
- elsize = ezcl_get_device_mem_elsize(dev_H);
- printf("dev_H ptr : %p nelements %d elsize %d\n",dev_H,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_U);
- elsize = ezcl_get_device_mem_elsize(dev_U);
- printf("dev_U ptr : %p nelements %d elsize %d\n",dev_U,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_V);
- elsize = ezcl_get_device_mem_elsize(dev_V);
- printf("dev_V ptr : %p nelements %d elsize %d\n",dev_V,num_elements,elsize);
- num_elements = ezcl_get_device_mem_nelements(dev_mpot);
- elsize = ezcl_get_device_mem_elsize(dev_mpot);
- printf("dev_mpot ptr : %p nelements %d elsize %d\n",dev_mpot,num_elements,elsize);
- //num_elements = ezcl_get_device_mem_nelements(dev_ioffset);
- //elsize = ezcl_get_device_mem_elsize(dev_ioffset);
- //printf("dev_ioffset ptr : %p nelements %d elsize %d\n",dev_ioffset,num_elements,elsize);
-#endif
- state_memory.memory_report();
- //printf("vector H ptr : %p nelements %ld elsize %ld\n",&H[0],H.size(),sizeof(H[0]));
- //printf("vector U ptr : %p nelements %ld elsize %ld\n",&U[0],U.size(),sizeof(U[0]));
- //printf("vector V ptr : %p nelements %ld elsize %ld\n",&V[0],V.size(),sizeof(V[0]));
-}
-
-void State::print(void)
-{ //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
-
- if (mesh->fp == NULL) {
- char filename[10];
- sprintf(filename,"out%1d",mesh->mype);
- mesh->fp=fopen(filename,"w");
- }
-
- if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
- fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- } else {
- fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
- }
- }
-}
-
-const int CRUX_STATE_VERSION = 102;
-const int num_int_vals = 1;
-
-size_t State::get_checkpoint_size(void)
-{
-#ifdef FULL_PRECISION
- size_t nsize = mesh->ncells*3*sizeof(double);
-#else
- size_t nsize = mesh->ncells*3*sizeof(float);
-#endif
- nsize += num_int_vals*sizeof(int);
- nsize += mesh->get_checkpoint_size();
- return(nsize);
-}
-
-void State::store_checkpoint(Crux *crux)
-{
- // Store mesh data first
- mesh->store_checkpoint(crux);
-
-//#ifndef HAVE_MPI
- // Load up scalar values
- int int_vals[num_int_vals];
- int_vals[0] = CRUX_STATE_VERSION;
-
- // Add to memory database for storing checkpoint
- state_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "state_int_vals", RESTART_DATA | REPLICATED_DATA);
- state_memory.memory_add(cpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_cpu_timers", RESTART_DATA);
- state_memory.memory_add(gpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_gpu_timers", RESTART_DATA);
-
- crux->store_MallocPlus(state_memory);
-
- // Remove from database after checkpoint is stored
- state_memory.memory_remove(int_vals);
- state_memory.memory_remove(cpu_timers);
- state_memory.memory_remove(gpu_timers);
-//#endif
-}
-
-void State::restore_checkpoint(Crux *crux)
-{
- int storage;
- // Restore mesh data first
- mesh->restore_checkpoint(crux);
- crux->restore_named_ints("storage", 7, &storage, 1);
-
- // Create memory for restoring data into
- int int_vals[num_int_vals];
-
- // allocate is a state method
- allocate(storage);
-
- // Add to memory database for restoring checkpoint
- state_memory.memory_add(int_vals, (size_t)num_int_vals, 4, "state_int_vals", RESTART_DATA | REPLICATED_DATA);
- state_memory.memory_add(cpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_cpu_timers", RESTART_DATA);
- state_memory.memory_add(gpu_timers, (size_t)STATE_TIMER_SIZE, 8, "state_gpu_timers", RESTART_DATA);
-
- // Restore memory database
- crux->restore_MallocPlus(state_memory);
-
- // Check version number
- if (int_vals[ 0] != CRUX_STATE_VERSION) {
- printf("CRUX version mismatch for state data, version on file is %d, version in code is %d\n",
- int_vals[0], CRUX_STATE_VERSION);
- exit(0);
- }
-
-#ifdef DEBUG_RESTORE_VALS
- if (DEBUG_RESTORE_VALS) {
- printf("\n");
- printf(" === Restored state cpu timers ===\n");
- for (int i = 0; i < STATE_TIMER_SIZE; i++){
- printf(" %-30s %lg\n",state_timer_descriptor[i], cpu_timers[i]);
- }
- printf(" === Restored state cpu timers ===\n");
- printf("\n");
- }
-#endif
-
-#ifdef DEBUG_RESTORED_VALS
- if (DEBUG_RESTORED_VALS) {
- printf("\n");
- printf(" === Restored state gpu timers ===\n");
- for (int i = 0; i < STATE_TIMER_SIZE; i++){
- printf(" %-30s %lld\n",state_timer_descriptor[i], gpu_timers[i]);
- }
- printf(" === Restored state gpu_timers ===\n");
- printf("\n");
- }
-#endif
-
- state_memory.memory_remove(int_vals);
- state_memory.memory_remove(cpu_timers);
- state_memory.memory_remove(gpu_timers);
-
- memory_reset_ptrs();
-//#endif
-}
-
-// Added overloaded print to get mesh information to print in each cycle
-// Brian Atkinson (5-29-14)
-void State::print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage)
-{ //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
-
- char filename[40];
- sprintf(filename,"iteration%d",iteration);
- mesh->fp=fopen(filename,"w");
-
- if(iteration_mass == 0.0){
- fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
- fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
- fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tSimulation Time: %lf\n", initial_mass, simTime);
- }
- else{
- double mass_diff = iteration_mass - initial_mass;
- fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
- fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
- fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass);
- fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage);
- }
-
- if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
- fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- } else {
- fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
- }
- }
-}
-
-void State::print_local(int ncycle)
-{ //printf("size is %lu %lu %lu %lu %lu\n",index.size(), i.size(), level.size(), nlft.size(), x.size());
-
- if (mesh->fp == NULL) {
- char filename[10];
- sprintf(filename,"out%1d",mesh->mype);
- mesh->fp=fopen(filename,"w");
- }
-
- fprintf(mesh->fp,"DEBUG in print_local ncycle is %d\n",ncycle);
- if (mesh->nlft != NULL){
- fprintf(mesh->fp,"%d: index H U V i j lev nlft nrht nbot ntop\n",mesh->mype);
- uint state_size = state_memory.get_memory_size(H);
- for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
- if (ic >= state_size){
- fprintf(mesh->fp,"%d: %6d %4d %4d %4d %4d %4d %4d %4d\n", mesh->mype,ic, mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- } else {
- fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d %4d %4d %4d %4d\n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- }
- } else {
- fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d\n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
- }
- }
-}
-
-void State::print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan){
- char filename[] = {"failure.log"};
- mesh->fp=fopen(filename,"w");
-
- double mass_diff = iteration_mass - initial_mass;
- if(got_nan){
- fprintf(mesh->fp,"Failed because of nan for H_sum was equal to NAN\n");
- }
- else{
- fprintf(mesh->fp,"Failed because mass difference is outside of accepted percentage\n");
- }
- fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
- fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
- fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass);
- fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage);
-
- if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
- fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- } else {
- fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
- }
- }
-}
-
-void State::print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status){
- char filename[40];
- sprintf(filename, "rollback%d.log",backup_attempt);
- mesh->fp=fopen(filename,"w");
-
- double mass_diff = iteration_mass - initial_mass;
- if(error_status == STATUS_NAN){
- fprintf(mesh->fp,"Rolling back because of nan for H_sum was equal to NAN\n");
- }
- else{
- fprintf(mesh->fp,"Rolling back because mass difference is outside of accepted percentage\n");
- }
- fprintf(mesh->fp,"Rollback attempt %d of %d ---> Number of attempts left:%d\n", backup_attempt, num_of_attempts, num_of_attempts - backup_attempt);
- fprintf(mesh->fp,"Iteration = %d\t\tSimuation Time = %lf\n", iteration, simTime);
- fprintf(mesh->fp,"mesh->ncells = %lu\t\tmesh->ncells_ghost = %lu\n", mesh->ncells, mesh->ncells_ghost);
- fprintf(mesh->fp,"Initial Mass: %14.12lg\t\tIteration Mass: %14.12lg\n", initial_mass, iteration_mass);
- fprintf(mesh->fp,"Mass Difference: %12.6lg\t\tMass Difference Percentage: %12.6lg%%\n", mass_diff, mass_diff_percentage);
-
- if (mesh->mesh_memory.get_memory_size(mesh->nlft) >= mesh->ncells_ghost){
- fprintf(mesh->fp,"%d: index global i j lev nlft nrht nbot ntop \n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- for (uint ic=mesh->ncells; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %6d %4d %4d %4d %4d %4d %4d %4d \n", mesh->mype,ic, ic+mesh->noffset,mesh->i[ic], mesh->j[ic], mesh->level[ic], mesh->nlft[ic], mesh->nrht[ic], mesh->nbot[ic], mesh->ntop[ic]);
- }
- } else {
- fprintf(mesh->fp,"%d: index H U V i j lev\n",mesh->mype);
- for (uint ic=0; ic<mesh->ncells_ghost; ic++) {
- fprintf(mesh->fp,"%d: %6d %lf %lf %lf %4d %4d %4d \n", mesh->mype,ic, H[ic], U[ic], V[ic], mesh->i[ic], mesh->j[ic], mesh->level[ic]);
- }
- }
-}
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/state.h?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/state.h (removed)
@@ -1,364 +0,0 @@
-/*
- * Copyright (c) 2011-2013, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifndef STATE_H_
-#define STATE_H_
-
-#include <list>
-#include "MallocPlus.h"
-#include "mesh.h"
-#include "crux.h"
-#ifdef HAVE_OPENCL
-#include "ezcl/ezcl.h"
-#endif
-//#include "l7/l7.h"
-
-#define STATUS_OK 0
-#define STATUS_NAN 1
-#define STATUS_MASS_LOSS 2
-
-#if !defined(FULL_PRECISION) && !defined(MIXED_PRECISION) && !defined(MINIMUM_PRECISION)
-#define FULL_PRECISION
-#endif
-#ifdef NO_CL_DOUBLE
-#undef FULL_PRECISION
-#undef MIXED_PRECISION
-#define MINIMUM_PRECISION
-#endif
-
-#if defined(MINIMUM_PRECISION)
- typedef float state_t; // this is for physics state variables ncell in size
- typedef float real_t; // this is used for intermediate calculations
- typedef struct
- {
- float s0;
- float s1;
- } real2_t;
-#define CONSERVATION_EPS 15.0
-#ifdef HAVE_OPENCL
- typedef cl_float cl_state_t; // for gpu physics state variables
- typedef cl_float4 cl_state4_t; // for gpu physics state variables
- typedef cl_float cl_real_t; // for intermediate gpu physics state variables
- typedef cl_float2 cl_real2_t; // for intermediate gpu physics state variables
- typedef cl_float4 cl_real4_t; // for intermediate gpu physics state variables
-#endif
-#ifdef HAVE_MPI
- #define MPI_STATE_T MPI_FLOAT // for MPI communication for physics state variables
- #define MPI_REAL_T MPI_FLOAT // for MPI communication for physics state variables
- #define L7_STATE_T L7_FLOAT
- #define L7_REAL_T L7_FLOAT
-#endif
-
-#elif defined(MIXED_PRECISION) // intermediate values calculated high precision and stored as floats
- typedef float state_t;
- typedef double real_t;
- typedef struct
- {
- double s0;
- double s1;
- } real2_t;
-#define CONSERVATION_EPS .02
-#ifdef HAVE_OPENCL
- typedef cl_float cl_state_t;
- typedef cl_float4 cl_state4_t;
- typedef cl_double cl_real_t; // for intermediate gpu physics state variables
- typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
- typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
-#endif
-#ifdef HAVE_MPI
- #define MPI_STATE_T MPI_FLOAT
- #define MPI_REAL_T MPI_DOUBLE
- #define L7_STATE_T L7_FLOAT
- #define L7_REAL_T L7_DOUBLE
-#endif
-
-#elif defined(FULL_PRECISION)
- typedef double state_t;
- typedef double real_t;
- typedef struct
- {
- double s0;
- double s1;
- } real2_t;
-#define CONSERVATION_EPS .02
-#ifdef HAVE_OPENCL
- typedef cl_double cl_state_t;
- typedef cl_double4 cl_state4_t;
- typedef cl_double cl_real_t; // for intermediate gpu physics state variables
- typedef cl_double2 cl_real2_t; // for intermediate gpu physics state variables
- typedef cl_double4 cl_real4_t; // for intermediate gpu physics state variables
-#endif
-#ifdef HAVE_MPI
- #define MPI_STATE_T MPI_DOUBLE
- #define MPI_REAL_T MPI_DOUBLE
- #define L7_STATE_T L7_DOUBLE
- #define L7_REAL_T L7_DOUBLE
-#endif
-#endif
-
-extern "C" void do_calc(void);
-
-enum SUM_TYPE {
- SUM_REGULAR,
- SUM_KAHAN
-};
-
-
-enum SIGN_RULE {
- DIAG_RULE,
- X_RULE,
- Y_RULE,
-};
-
-enum state_timers
-{
- STATE_TIMER_APPLY_BCS,
- STATE_TIMER_SET_TIMESTEP,
- STATE_TIMER_FINITE_DIFFERENCE,
- STATE_TIMER_REFINE_POTENTIAL,
- STATE_TIMER_CALC_MPOT,
- STATE_TIMER_REZONE_ALL,
- STATE_TIMER_MASS_SUM,
- STATE_TIMER_READ,
- STATE_TIMER_WRITE,
- STATE_TIMER_SIZE
-};
-
-typedef enum state_timers state_timer_category;
-
-using namespace std;
-
-class State {
-
-public:
- MallocPlus state_memory;
- MallocPlus gpu_state_memory;
- Mesh *mesh;
- state_t *H;
- state_t *U;
- state_t *V;
-
-#ifdef HAVE_OPENCL
- cl_mem dev_H;
- cl_mem dev_U;
- cl_mem dev_V;
-
- cl_mem dev_mass_sum;
- cl_mem dev_deltaT;
-
- cl_event apply_BCs_event;
-
- cl_mem dev_mpot;
- //cl_mem dev_ioffset;
- cl_mem dev_result;
-#endif
-
- double cpu_timers[STATE_TIMER_SIZE];
- long long gpu_timers[STATE_TIMER_SIZE];
-
- // constructor -- allocates state arrays to size ncells
- State(Mesh *mesh_in);
-
- void init(int do_gpu_calc);
- void terminate(void);
-
- /* Memory routines for linked list of state arrays */
- void allocate(size_t ncells);
- void allocate_from_backup_file(FILE *fp);
- void allocate_for_rollback(State *state_to_copy);
- void resize(size_t ncells);
- void memory_reset_ptrs(void);
-#ifdef HAVE_OPENCL
- void allocate_device_memory(size_t ncells);
-#endif
- void resize_old_device_memory(size_t ncells);
-
- /* Accessor routines */
- double get_cpu_timer(state_timer_category category) {return(cpu_timers[category]); };
- /* Convert nanoseconds to msecs */
- double get_gpu_timer(state_timer_category category) {return((double)(gpu_timers[category])*1.0e-9); };
-
- /* Boundary routines -- not currently used */
- void add_boundary_cells(void);
- void apply_boundary_conditions(void);
- void apply_boundary_conditions_local(void);
- void apply_boundary_conditions_ghost(void);
- void remove_boundary_cells(void);
-
- /*******************************************************************
- * set_timestep
- * Input
- * H, U, V -- from state object
- * celltype, level, lev_delta
- * Output
- * mindeltaT returned
- *******************************************************************/
- double set_timestep(double g, double sigma);
-#ifdef HAVE_OPENCL
- double gpu_set_timestep(double sigma);
-#endif
-
- /*******************************************************************
- * calc finite difference
- * will add ghost region to H, U, V and fill at start of routine
- * Input
- * H, U, V -- from state object
- * nlft, nrht, nbot, ntop, level, celltype -- from mesh object
- * Output
- * H, U, V
- *******************************************************************/
- void calc_finite_difference(double deltaT);
- void calc_finite_difference_via_faces(double deltaT);
-#ifdef HAVE_OPENCL
- void gpu_calc_finite_difference(double deltaT);
-#endif
-
- /*******************************************************************
- * calc refine potential -- state has responsibility to calc initial
- * refinement potential array that is then passed to mesh for
- * smoothing and enforcing refinement ruiles
- * Input
- * H, U, V -- from state object
- * Output
- * mpot
- * ioffset
- * count
- *******************************************************************/
- size_t calc_refine_potential(vector<int> &mpot, int &icount, int &jcount);
-#ifdef HAVE_OPENCL
- size_t gpu_calc_refine_potential(int &icount, int &jcount);
-#endif
-
- /*******************************************************************
- * rezone all -- most of call is done in mesh
- * Input
- * Mesh and state variables
- * Output
- * New mesh and state variables on refined mesh
- *******************************************************************/
- void rezone_all(int icount, int jcount, vector<int> mpot);
-#ifdef HAVE_OPENCL
- void gpu_rezone_all(int icount, int jcount, bool localStencil);
-#endif
-
- /*******************************************************************
- * load balance -- most of call is done in mesh, but pointers are
- * reset to newly allocated state arrays
- * Input
- * Mesh and state variables
- * Output
- * New mesh and state variables on refined mesh
- *******************************************************************/
-#ifdef HAVE_MPI
- void do_load_balance_local(size_t &numcells);
-#ifdef HAVE_OPENCL
- void gpu_do_load_balance_local(size_t &numcells);
-#endif
-#endif
-
- /*******************************************************************
- * mass sum -- Conservation of mass check
- * Input
- * H from state object
- * Precision type for sum
- * Output
- * total mass is returned
- *******************************************************************/
- double mass_sum(int enhanced_precision_sum);
-#ifdef HAVE_OPENCL
- double gpu_mass_sum(int enhanced_precision_sum);
-#endif
-
- void fill_circle(double circ_radius, double fill_value, double background);
- void state_reorder(vector<int> iorder);
-
- void symmetry_check(const char *string, vector<int> sym_index, double eps,
- SIGN_RULE sign_rule, int &flag);
-
- void output_timing_info(int do_cpu_calc, int do_gpu_calc, double total_elapsed_time);
-
- /* state comparison routines */
-#ifdef HAVE_OPENCL
- void compare_state_gpu_global_to_cpu_global(const char* string, int cycle, uint ncells);
-#endif
- void compare_state_cpu_local_to_cpu_global(State *state_global, const char* string, int cycle, uint ncells, uint ncells_global, int *nsizes, int *ndispl);
-#ifdef HAVE_OPENCL
- void compare_state_all_to_gpu_local(State *state_global, uint ncells, uint ncells_global, int mype, int ncycle, int *nsizes, int *ndispl);
-#endif
-
- void output_timer_block(mesh_device_types device_type, double elapsed_time,
- double mesh_time, double compute_time, double total_elapsed_time, double speedup_ratio);
-
- void timer_output(state_timer_category category, mesh_device_types device_type, int timer_level);
-
- void print(void);
-
- size_t get_checkpoint_size(void);
- void store_checkpoint(Crux *crux);
- void restore_checkpoint(Crux *crux);
- //Added to for second print for every interation: Brian Atkinson (5-29-14)
- void print(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage);
- void print_local(int ncycle);
- void print_failure_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, bool got_nan);
- void print_rollback_log(int iteration, double simTime, double initial_mass, double iteration_mass, double mass_diff_percentage, int backup_attempt, int num_of_attempts, int error_status);
-
-private:
- State(const State&); // To block copy constructor so copies are not made inadvertently
-
- void print_object_info(void);
-};
-
-#endif // ifndef STATE_H_
-
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/timer.c?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.c (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.c (removed)
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#include <sys/time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <string.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "timer.h"
-
-void cpu_timer_start(struct timeval *tstart_cpu){
-#ifdef _OPENMP
- if ( omp_in_parallel() ) {
-#pragma omp master
- {
- gettimeofday(tstart_cpu, NULL);
- }
- } else {
- gettimeofday(tstart_cpu, NULL);
- }
-#else
- gettimeofday(tstart_cpu, NULL);
-#endif
-}
-
-double cpu_timer_stop(struct timeval tstart_cpu){
- double result;
- struct timeval tstop_cpu, tresult;
-
-#ifdef _OPENMP
- if ( omp_in_parallel() ) {
-#pragma omp master
- {
- gettimeofday(&tstop_cpu, NULL);
- tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec;
- tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec;
- result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6;
- }
- } else {
- gettimeofday(&tstop_cpu, NULL);
- tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec;
- tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec;
- result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6;
- }
-#else
- gettimeofday(&tstop_cpu, NULL);
- tresult.tv_sec = tstop_cpu.tv_sec - tstart_cpu.tv_sec;
- tresult.tv_usec = tstop_cpu.tv_usec - tstart_cpu.tv_usec;
- result = (double)tresult.tv_sec + (double)tresult.tv_usec*1.0e-6;
-#endif
- return(result);
-}
-
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/timer.h?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/timer.h (removed)
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifndef _TIMER_H
-#define _TIMER_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void cpu_timer_start(struct timeval *tstart_cpu);
-double cpu_timer_stop(struct timeval tstart_cpu);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _TIMER_H */
-
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/zorder.c?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.c (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.c (removed)
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-
-#include <stdio.h>
-#include <math.h>
-#include "s7.h"
-#include "zorder.h"
-
-#define DEBUG 0
-
-void calc_zorder(int size, int *i, int *j, int *level, int levmx, int ibase, int *z_index, int *z_order)
-{ unsigned long long ibit, // Bitwise representation of x-index.
- jbit; // Bitwise representation of y-index.
-
- // Convert the indices to a bitwise representation.
- int ic;
- for (ic = 0; ic < size; ic++)
- { if (level[ic] < 0) continue;
- ibit = index_to_bit(i[ic], level[ic], levmx, ibase);
- jbit = index_to_bit(j[ic], level[ic], levmx, ibase);
- z_index[ic] = twobit_to_index(ibit, jbit);
- z_order[ic] = ic; }
-
- // Sort the z-ordered indices.
- S7_Index_Sort(z_index, size, S7_INT, z_order);
-
- // Output ordered mesh information.
- if (DEBUG)
- { printf("orig index i j lev ibit jbit ijbit z index z order\n");
- for (ic=0; ic<size; ic++){
- printf(" %6d %4d %4d %4d ",ic+1, j[ic], i[ic], level[ic]);
- printbits(index_to_bit(j[ic], level[ic], levmx, ibase));
- printf(" ");
- printbits(index_to_bit(i[ic], level[ic], levmx, ibase));
- printf(" ");
- printbits( index_to_bit(i[ic], level[ic], levmx, ibase)
- | (index_to_bit(j[ic], level[ic], levmx, ibase)
- << 1));
- printf(" %6d %5d\n",z_index[ic], z_order[ic]); } } }
-
-unsigned long long index_to_bit(unsigned long long index,
- int lev,
- int levmx,
- int ibase)
-{ static const unsigned long long B[] =
- {0x55555555, /* 01010101010101010101010101010101 */
- 0x33333333, /* 00110011001100110011001100110011 */
- 0x0F0F0F0F, /* 00001111000011110000111100001111 */
- 0x00FF00FF, /* 00000000111111110000000011111111 */
- 0x0000FFFF}; /* 00000000000000001111111111111111 */
- static const unsigned long long S[] = {1, 2, 4, 8, 16};
-
- // Convert the index to a bit representation.
- unsigned long long ii, ibit;
- ii = index - ibase;
- if (lev < levmx)
- { ii = ii * pow((double)2, (double)(levmx - lev)); }
- ibit = ii;
- ibit = (ibit | (ibit << S[3])) & B[3];
- ibit = (ibit | (ibit << S[2])) & B[2];
- ibit = (ibit | (ibit << S[1])) & B[1];
- ibit = (ibit | (ibit << S[0])) & B[0];
-
- return (ibit); }
-
-unsigned long long twobit_to_index(unsigned long long ibit,
- unsigned long long jbit)
-{ unsigned long long ijbit;
- return (ijbit = ibit | (jbit << 1)); }
-
-// Print n as a binary number.
-void printbits(int n)
-{
- int i, step;
-
- if (0 == n)
- { // For simplicity's sake, treat 0 as a special case.
- printf("00000000");
- return; }
-
- i = 1 << (sizeof(n) * 8 - 1);
- step = -1; // Only print the relevant digits.
- step >>= 8; // Print in groups of four.
- while (step >= n)
- { i >>= 8;
- step >>= 8; }
-
- // At this point, i is the smallest power of two larger or equal to n.
- while (i > 0)
- { if (n & i)
- printf("1");
- else
- printf("0");
- i >>= 1; } }
-
Removed: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.h
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CLAMR/zorder.h?rev=312481&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.h (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CLAMR/zorder.h (removed)
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2011-2012, Los Alamos National Security, LLC.
- * All rights Reserved.
- *
- * Copyright 2011-2012. Los Alamos National Security, LLC. This software was produced
- * under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
- * Laboratory (LANL), which is operated by Los Alamos National Security, LLC
- * for the U.S. Department of Energy. The U.S. Government has rights to use,
- * reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
- * ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
- * ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
- * to produce derivative works, such modified software should be clearly marked,
- * so as not to confuse it with the version available from LANL.
- *
- * Additionally, redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Los Alamos National Security, LLC, Los Alamos
- * National Laboratory, LANL, the U.S. Government, nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE LOS ALAMOS NATIONAL SECURITY, LLC AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
- * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LOS ALAMOS NATIONAL
- * SECURITY, LLC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * CLAMR -- LA-CC-11-094
- * This research code is being developed as part of the
- * 2011 X Division Summer Workshop for the express purpose
- * of a collaborative code for development of ideas in
- * the implementation of AMR codes for Exascale platforms
- *
- * AMR implementation of the Wave code previously developed
- * as a demonstration code for regular grids on Exascale platforms
- * as part of the Supercomputing Challenge and Los Alamos
- * National Laboratory
- *
- * Authors: Bob Robey XCP-2 brobey at lanl.gov
- * Neal Davis davis68 at lanl.gov, davis68 at illinois.edu
- * David Nicholaeff dnic at lanl.gov, mtrxknight at aol.com
- * Dennis Trujillo dptrujillo at lanl.gov, dptru10 at gmail.com
- *
- */
-#ifndef _ZORDER_H
-#define _ZORDER_H
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-void calc_zorder(int size, int *i, int *j, int *level, int levmx, int ibase, int *z_index, int *z_order);
-unsigned long long index_to_bit(unsigned long long index, int lev, int levmx, int ibase);
-unsigned long long twobit_to_index(unsigned long long ibit, unsigned long long jbit);
-void printbits(int n);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ZORDER_H */
-
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/CMakeLists.txt?rev=312482&r1=312481&r2=312482&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/CMakeLists.txt Mon Sep 4 04:27:13 2017
@@ -1,4 +1,3 @@
add_subdirectory(HPCCG)
add_subdirectory(PENNANT)
add_subdirectory(miniFE)
-add_subdirectory(CLAMR)
Modified: test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C%2B%2B/Makefile?rev=312482&r1=312481&r2=312482&view=diff
==============================================================================
--- test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile (original)
+++ test-suite/trunk/MultiSource/Benchmarks/DOE-ProxyApps-C++/Makefile Mon Sep 4 04:27:13 2017
@@ -1,6 +1,6 @@
# MultiSource/DOE-ProxyApps-C++ Makefile: Build all subdirectories automatically
LEVEL = ../../..
-PARALLEL_DIRS = HPCCG PENNANT miniFE CLAMR
+PARALLEL_DIRS = HPCCG PENNANT miniFE
include $(LEVEL)/Makefile.programs
More information about the llvm-commits
mailing list